Diff: STRATO-apps/wordpress_03/app/wp-content/plugins/aimogen-pro/res/readability/Nodes/NodeUtility.php

Keine Baseline-Datei – Diff nur gegen leer.
Zur Liste
1 -
1 + <?php
2 +
3 + namespace fivefilters\Readability\Nodes;
4 +
5 + use fivefilters\Readability\Nodes\DOM\DOMDocument;
6 + use fivefilters\Readability\Nodes\DOM\DOMElement;
7 + use fivefilters\Readability\Nodes\DOM\DOMNode;
8 + use fivefilters\Readability\Nodes\DOM\DOMNodeList;
9 +
10 + /**
11 + * Class NodeUtility.
12 + */
13 + class NodeUtility
14 + {
15 + /**
16 + * Collection of regexps to check the node usability.
17 + *
18 + * @var array
19 + */
20 + public static $regexps = [
21 + 'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
22 + 'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i',
23 + 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
24 + 'byline' => '/byline|author|dateline|writtenby|p-author/i',
25 + 'replaceFonts' => '/<(\/?)font[^>]*>/i',
26 + 'normalize' => '/\s{2,}/',
27 + 'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
28 + 'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i',
29 + 'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
30 + 'prevLink' => '/(prev|earl|old|new|<|«)/i',
31 + 'tokenize' => '/\W+/',
32 + 'whitespace' => '/^\s*$/',
33 + 'hasContent' => '/\S$/',
34 + 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
35 + 'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
36 + // \x{00A0} is the unicode version of &nbsp;
37 + 'onlyWhitespace' => '/\x{00A0}|\s+/u',
38 + 'hashUrl' => '/^#.+/',
39 + 'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/',
40 + 'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i',
41 + // See: https://schema.org/Article
42 + 'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/'
43 +
44 + ];
45 +
46 + /**
47 + * Finds the next node, starting from the given node, and ignoring
48 + * whitespace in between. If the given node is an element, the same node is
49 + * returned.
50 + *
51 + * Imported from the Element class on league\html-to-markdown.
52 + *
53 + * @param $node
54 + *
55 + * @return DOMNode
56 + */
57 + public static function nextNode($node)
58 + {
59 + $next = $node;
60 + while ($next
61 + && $next->nodeType !== XML_ELEMENT_NODE
62 + && $next->isWhitespace()) {
63 + $next = $next->nextSibling;
64 + }
65 +
66 + return $next;
67 + }
68 +
69 + /**
70 + * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
71 + * element with the new tag name and importing it to the main DOMDocument.
72 + *
73 + * @param DOMNode $node
74 + * @param string $value
75 + * @param bool $importAttributes
76 + *
77 + * @return DOMNode
78 + */
79 + public static function setNodeTag($node, $value, $importAttributes = true)
80 + {
81 + $new = new DOMDocument('1.0', 'utf-8');
82 + $new->appendChild($new->createElement($value));
83 +
84 + $children = $node->childNodes;
85 + /** @var $children \DOMNodeList $i */
86 + for ($i = 0; $i < $children->length; $i++) {
87 + $import = $new->importNode($children->item($i), true);
88 + $new->firstChild->appendChild($import);
89 + }
90 +
91 + if ($importAttributes) {
92 + // Import attributes from the original node.
93 + foreach ($node->attributes as $attribute) {
94 + $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
95 + }
96 + }
97 +
98 + // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
99 + $import = $node->ownerDocument->importNode($new->firstChild, true);
100 + $node->parentNode->replaceChild($import, $node);
101 +
102 + return $import;
103 + }
104 +
105 + /**
106 + * Removes the current node and returns the next node to be parsed (child, sibling or parent).
107 + *
108 + * @param DOMNode $node
109 + *
110 + * @return DOMNode
111 + */
112 + public static function removeAndGetNext($node)
113 + {
114 + $nextNode = self::getNextNode($node, true);
115 + $node->parentNode->removeChild($node);
116 +
117 + return $nextNode;
118 + }
119 +
120 + /**
121 + * Remove the selected node.
122 + *
123 + * @param $node DOMElement
124 + *
125 + * @return void
126 + **/
127 + public static function removeNode($node)
128 + {
129 + $parent = $node->parentNode;
130 + if ($parent) {
131 + $parent->removeChild($node);
132 + }
133 + }
134 +
135 + /**
136 + * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
137 + * for parents.
138 + *
139 + * @param DOMNode $originalNode
140 + * @param bool $ignoreSelfAndKids
141 + *
142 + * @return DOMNode
143 + */
144 + public static function getNextNode($originalNode, $ignoreSelfAndKids = false)
145 + {
146 + /*
147 + * Traverse the DOM from node to node, starting at the node passed in.
148 + * Pass true for the second parameter to indicate this node itself
149 + * (and its kids) are going away, and we want the next node over.
150 + *
151 + * Calling this in a loop will traverse the DOM depth-first.
152 + */
153 +
154 + // First check for kids if those aren't being ignored
155 + if (!$ignoreSelfAndKids && $originalNode->firstChild) {
156 + return $originalNode->firstChild;
157 + }
158 +
159 + // Then for siblings...
160 + if ($originalNode->nextSibling) {
161 + return $originalNode->nextSibling;
162 + }
163 +
164 + // And finally, move up the parent chain *and* find a sibling
165 + // (because this is depth-first traversal, we will have already
166 + // seen the parent nodes themselves).
167 + do {
168 + $originalNode = $originalNode->parentNode;
169 + } while ($originalNode && !$originalNode->nextSibling);
170 +
171 + return ($originalNode) ? $originalNode->nextSibling : $originalNode;
172 + }
173 +
174 + /**
175 + * Remove all empty DOMNodes from DOMNodeLists.
176 + *
177 + * @param \DOMNodeList $list
178 + *
179 + * @return DOMNodeList
180 + */
181 + public static function filterTextNodes(\DOMNodeList $list)
182 + {
183 + $newList = new DOMNodeList();
184 + foreach ($list as $node) {
185 + if ($node->nodeType !== XML_TEXT_NODE || readability_mb_strlen(trim($node->nodeValue))) {
186 + $newList->add($node);
187 + }
188 + }
189 +
190 + return $newList;
191 + }
192 + }
193 +