]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; protected $selectors = array(); /** * Constructs with the selector string * * @param string $selector */ public function __construct($selector) { $this->parseSelectorString($selector); } /** * Returns the selectors that where found in __construct * * @return array */ public function getSelectors() { return $this->selectors; } /** * Attempts to find the selectors starting from the given * node object. * * @param AbstractNode $node * @return Collection */ public function find(AbstractNode $node) { $results = new Collection; foreach ($this->selectors as $selector) { $nodes = array($node); if (count($selector) == 0) { continue; } $options = array(); foreach ($selector as $rule) { if ($rule['alterNext']) { $options[] = $this->alterNext($rule); continue; } $nodes = $this->seek($nodes, $rule, $options); // clear the options $options = array(); } // this is the final set of nodes foreach ($nodes as $result) { $results[] = $result; } } return $results; } /** * Parses the selector string * * @param string $selector */ protected function parseSelectorString($selector) { $matches = array(); preg_match_all($this->pattern, trim($selector).' ', $matches, PREG_SET_ORDER); // skip tbody $result = array(); foreach ($matches as $match) { // default values $tag = strtolower(trim($match[1])); $operator = '='; $key = null; $value = null; $noKey = false; $alterNext = false; // check for elements that alter the behavior of the next element if ($tag == '>') { $alterNext = true; } // check for id selector if ( ! empty($match[2])) { $key = 'id'; $value = $match[2]; } // check for class selector if ( ! empty($match[3])) { $key = 'class'; $value = $match[3]; } // and final attribute selector if ( ! empty($match[4])) { $key = strtolower($match[4]); } if ( ! empty($match[5])) { $operator = $match[5]; } if ( ! empty($match[6])) { $value = $match[6]; } // check for elements that do not have a specified attribute if (isset($key[0]) && $key[0] == '!') { $key = substr($key, 1); $noKey = true; } $result[] = array( 'tag' => $tag, 'key' => $key, 'value' => $value, 'operator' => $operator, 'noKey' => $noKey, 'alterNext' => $alterNext, ); if (trim($match[7]) == ',') { $this->selectors[] = $result; $result = array(); } } // save last results if (count($result) > 0) { $this->selectors[] = $result; } } /** * Attempts to find all children that match the rule * given. * * @param array $nodes * @param array $rule * @param array $options * @return array * @recursive */ protected function seek(array $nodes, array $rule, array $options) { // XPath index if (count($rule['tag']) > 0 && count($rule['key']) > 0 && is_numeric($rule['key']) ) { $count = 0; /** @var AbstractNode $node */ foreach ($nodes as $node) { if ($rule['tag'] == '*' || $rule['tag'] == $node->getTag()->name() ) { ++$count; if ($count == $rule['key']) { // found the node we wanted return array($node); } } } return array(); } $options = $this->flattenOptions($options); $return = array(); /** @var InnerNode $node */ foreach ($nodes as $node) { // check if we are a leaf if ($node instanceof LeafNode || ! $node->hasChildren() ) { continue; } $children = array(); $child = $node->firstChild(); while ( ! is_null($child)) { // wild card, grab all if ($rule['tag'] == '*' && is_null($rule['key'])) { $return[] = $child; try { $child = $node->nextChild($child->id()); } catch (ChildNotFoundException $e) { // no more children $child = null; } continue; } $pass = true; // check tag if ( ! empty($rule['tag']) && $rule['tag'] != $child->getTag()->name() && $rule['tag'] != '*' ) { // child failed tag check $pass = false; } // check key if ($pass && ! is_null($rule['key'])) { if ($rule['noKey']) { if ( ! is_null($child->getAttribute($rule['key']))) { $pass = false; } } else { if ($rule['key'] != 'plaintext' && is_null($child->getAttribute($rule['key'])) ) { $pass = false; } } } // compare values if ($pass && ! is_null($rule['key']) && ! is_null($rule['value']) && $rule['value'] != '*' ) { if ($rule['key'] == 'plaintext') { // plaintext search $nodeValue = $child->text(); } else { // normal search $nodeValue = $child->getAttribute($rule['key']); } $check = $this->match($rule['operator'], $rule['value'], $nodeValue); // handle multiple classes if ( ! $check && $rule['key'] == 'class') { $childClasses = explode(' ', $child->getAttribute('class')); foreach ($childClasses as $class) { if ( ! empty($class)) { $check = $this->match($rule['operator'], $rule['value'], $class); } if ($check) { break; } } } if ( ! $check) { $pass = false; } } if ($pass) { // it passed all checks $return[] = $child; } else { // this child failed to be matched if ($child instanceof InnerNode && $child->hasChildren() ) { // we still want to check its children $children[] = $child; } } try { // get next child $child = $node->nextChild($child->id()); } catch (ChildNotFoundException $e) { // no more children $child = null; } } if (( ! isset($options['checkGrandChildren']) || $options['checkGrandChildren']) && count($children) > 0 ) { // we have children that failed but are not leaves. $matches = $this->seek($children, $rule, $options); foreach ($matches as $match) { $return[] = $match; } } } return $return; } /** * Attempts to match the given arguments with the given operator. * * @param string $operator * @param string $pattern * @param string $value * @return bool */ protected function match($operator, $pattern, $value) { $value = strtolower($value); $pattern = strtolower($pattern); switch ($operator) { case '=': return $value === $pattern; case '!=': return $value !== $pattern; case '^=': return preg_match('/^'.preg_quote($pattern, '/').'/', $value); case '$=': return preg_match('/'.preg_quote($pattern, '/').'$/', $value); case '*=': if ($pattern[0] == '/') { return preg_match($pattern, $value); } return preg_match("/".$pattern."/i", $value); } return false; } /** * Attempts to figure out what the alteration will be for * the next element. * * @param array $rule * @return array */ protected function alterNext($rule) { $options = array(); if ($rule['tag'] == '>') { $options['checkGrandChildren'] = false; } return $options; } /** * Flattens the option array. * * @param array $optionsArray * @return array */ protected function flattenOptions(array $optionsArray) { $options = array(); foreach ($optionsArray as $optionArray) { foreach ($optionArray as $key => $option) { $options[$key] = $option; } } return $options; } }