Diff: STRATO-apps/wordpress_03/app/wp-content/plugins/aimogen-pro/res/readability/Nodes/NodeUtility.php
Keine Baseline-Datei – Diff nur gegen leer.
1
-
1
+
<?php
2
+
3
+
namespace fivefilters\Readability\Nodes;
4
+
5
+
use fivefilters\Readability\Nodes\DOM\DOMDocument;
6
+
use fivefilters\Readability\Nodes\DOM\DOMElement;
7
+
use fivefilters\Readability\Nodes\DOM\DOMNode;
8
+
use fivefilters\Readability\Nodes\DOM\DOMNodeList;
9
+
10
+
/**
11
+
* Class NodeUtility.
12
+
*/
13
+
class NodeUtility
14
+
{
15
+
/**
16
+
* Collection of regexps to check the node usability.
17
+
*
18
+
* @var array
19
+
*/
20
+
public static $regexps = [
21
+
'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
22
+
'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i',
23
+
'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
24
+
'byline' => '/byline|author|dateline|writtenby|p-author/i',
25
+
'replaceFonts' => '/<(\/?)font[^>]*>/i',
26
+
'normalize' => '/\s{2,}/',
27
+
'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
28
+
'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i',
29
+
'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
30
+
'prevLink' => '/(prev|earl|old|new|<|«)/i',
31
+
'tokenize' => '/\W+/',
32
+
'whitespace' => '/^\s*$/',
33
+
'hasContent' => '/\S$/',
34
+
'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
35
+
'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
36
+
// \x{00A0} is the unicode version of
37
+
'onlyWhitespace' => '/\x{00A0}|\s+/u',
38
+
'hashUrl' => '/^#.+/',
39
+
'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/',
40
+
'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i',
41
+
// See: https://schema.org/Article
42
+
'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/'
43
+
44
+
];
45
+
46
+
/**
47
+
* Finds the next node, starting from the given node, and ignoring
48
+
* whitespace in between. If the given node is an element, the same node is
49
+
* returned.
50
+
*
51
+
* Imported from the Element class on league\html-to-markdown.
52
+
*
53
+
* @param $node
54
+
*
55
+
* @return DOMNode
56
+
*/
57
+
public static function nextNode($node)
58
+
{
59
+
$next = $node;
60
+
while ($next
61
+
&& $next->nodeType !== XML_ELEMENT_NODE
62
+
&& $next->isWhitespace()) {
63
+
$next = $next->nextSibling;
64
+
}
65
+
66
+
return $next;
67
+
}
68
+
69
+
/**
70
+
* Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
71
+
* element with the new tag name and importing it to the main DOMDocument.
72
+
*
73
+
* @param DOMNode $node
74
+
* @param string $value
75
+
* @param bool $importAttributes
76
+
*
77
+
* @return DOMNode
78
+
*/
79
+
public static function setNodeTag($node, $value, $importAttributes = true)
80
+
{
81
+
$new = new DOMDocument('1.0', 'utf-8');
82
+
$new->appendChild($new->createElement($value));
83
+
84
+
$children = $node->childNodes;
85
+
/** @var $children \DOMNodeList $i */
86
+
for ($i = 0; $i < $children->length; $i++) {
87
+
$import = $new->importNode($children->item($i), true);
88
+
$new->firstChild->appendChild($import);
89
+
}
90
+
91
+
if ($importAttributes) {
92
+
// Import attributes from the original node.
93
+
foreach ($node->attributes as $attribute) {
94
+
$new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
95
+
}
96
+
}
97
+
98
+
// The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
99
+
$import = $node->ownerDocument->importNode($new->firstChild, true);
100
+
$node->parentNode->replaceChild($import, $node);
101
+
102
+
return $import;
103
+
}
104
+
105
+
/**
106
+
* Removes the current node and returns the next node to be parsed (child, sibling or parent).
107
+
*
108
+
* @param DOMNode $node
109
+
*
110
+
* @return DOMNode
111
+
*/
112
+
public static function removeAndGetNext($node)
113
+
{
114
+
$nextNode = self::getNextNode($node, true);
115
+
$node->parentNode->removeChild($node);
116
+
117
+
return $nextNode;
118
+
}
119
+
120
+
/**
121
+
* Remove the selected node.
122
+
*
123
+
* @param $node DOMElement
124
+
*
125
+
* @return void
126
+
**/
127
+
public static function removeNode($node)
128
+
{
129
+
$parent = $node->parentNode;
130
+
if ($parent) {
131
+
$parent->removeChild($node);
132
+
}
133
+
}
134
+
135
+
/**
136
+
* Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
137
+
* for parents.
138
+
*
139
+
* @param DOMNode $originalNode
140
+
* @param bool $ignoreSelfAndKids
141
+
*
142
+
* @return DOMNode
143
+
*/
144
+
public static function getNextNode($originalNode, $ignoreSelfAndKids = false)
145
+
{
146
+
/*
147
+
* Traverse the DOM from node to node, starting at the node passed in.
148
+
* Pass true for the second parameter to indicate this node itself
149
+
* (and its kids) are going away, and we want the next node over.
150
+
*
151
+
* Calling this in a loop will traverse the DOM depth-first.
152
+
*/
153
+
154
+
// First check for kids if those aren't being ignored
155
+
if (!$ignoreSelfAndKids && $originalNode->firstChild) {
156
+
return $originalNode->firstChild;
157
+
}
158
+
159
+
// Then for siblings...
160
+
if ($originalNode->nextSibling) {
161
+
return $originalNode->nextSibling;
162
+
}
163
+
164
+
// And finally, move up the parent chain *and* find a sibling
165
+
// (because this is depth-first traversal, we will have already
166
+
// seen the parent nodes themselves).
167
+
do {
168
+
$originalNode = $originalNode->parentNode;
169
+
} while ($originalNode && !$originalNode->nextSibling);
170
+
171
+
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
172
+
}
173
+
174
+
/**
175
+
* Remove all empty DOMNodes from DOMNodeLists.
176
+
*
177
+
* @param \DOMNodeList $list
178
+
*
179
+
* @return DOMNodeList
180
+
*/
181
+
public static function filterTextNodes(\DOMNodeList $list)
182
+
{
183
+
$newList = new DOMNodeList();
184
+
foreach ($list as $node) {
185
+
if ($node->nodeType !== XML_TEXT_NODE || readability_mb_strlen(trim($node->nodeValue))) {
186
+
$newList->add($node);
187
+
}
188
+
}
189
+
190
+
return $newList;
191
+
}
192
+
}
193
+