Switched to native html parser

This commit is contained in:
Daniil Gentili 2017-02-16 20:04:57 +01:00
parent c021ae362d
commit 2a04787c98
32 changed files with 106 additions and 93 deletions

View File

@ -10,7 +10,7 @@ Logo created by [Matthew Hesketh](http://matthewhesketh.com) (thanks again!).
PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old). PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old).
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. You must also install the mbstring, curl extensions.
Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed. Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed.

View File

@ -18,7 +18,6 @@
"phpseclib/phpseclib": "dev-ige", "phpseclib/phpseclib": "dev-ige",
"vlucas/phpdotenv": "^2.4", "vlucas/phpdotenv": "^2.4",
"krakjoe/pthreads-polyfill": "dev-master", "krakjoe/pthreads-polyfill": "dev-master",
"danog/php-html-parser": "dev-master",
"erusev/parsedown": "^1.6" "erusev/parsedown": "^1.6"
}, },
"require-dev": { "require-dev": {

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -52,7 +52,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -55,7 +55,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -14,7 +14,7 @@ Logo created by [Matthew Hesketh](http://matthewhesketh.com) (thanks again!).
PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old). PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old).
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. You must also install the mbstring, curl extensions.
Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed. Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed.
@ -240,7 +240,7 @@ To specify a custom callback change the correct value in the settings. The speci
### Uploading and downloading files ### Uploading and downloading files
MadelineProto provides wrapper methods to upload and download files. MadelineProto provides wrapper methods to upload and download files that support bot API file ids.
Every method described in this section accepts a last optional paramater with a callable function that will be called during the upload/download using the first parameter to pass a floating point number indicating the upload/download status in percentage. Every method described in this section accepts a last optional paramater with a callable function that will be called during the upload/download using the first parameter to pass a floating point number indicating the upload/download status in percentage.
@ -253,12 +253,13 @@ $inputFile = $MadelineProto->upload('file', 'optional new file name.ext');
$MadelineProto->messages->sendMedia(['peer' => '@pwrtelegramgroup', 'media' => $inputMedia]); $MadelineProto->messages->sendMedia(['peer' => '@pwrtelegramgroup', 'media' => $inputMedia]);
``` ```
To convert the result of sendMedia to a bot API file id select the messageMedia object from the output of the method and pass it to `$MadelineProto->API->MTProto_to_botAPI()`.
See tests/testing.php for more examples. See tests/testing.php for more examples.
There are multiple download methods that allow you to download a file to a directory, to a file or to a stream. There are multiple download methods that allow you to download a file to a directory, to a file or to a stream.
The first parameter of these functions must always be a [messageMediaPhoto](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaPhoto.html) or a [messageMediaDocument](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaDocument.html) object. These objects are usually received in updates, see `bot.php` for examples The first parameter of these functions must always be either a [messageMediaPhoto](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaPhoto.html) or a [messageMediaDocument](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaDocument.html) object or a bot API file id. These objects are usually received in updates, see `bot.php` for examples
``` ```
@ -309,6 +310,8 @@ See tests/testing.php for more examples.
Methods that allow sending message entities (messages.sendMessage for example) also have an additional parse_mode parameter that enables or disables html/markdown parsing of the message to be sent. See the method-specific documentation for more info. Methods that allow sending message entities (messages.sendMessage for example) also have an additional parse_mode parameter that enables or disables html/markdown parsing of the message to be sent. See the method-specific documentation for more info.
To convert the results of methods to bot API objects you must provide a second parameter to method wrappers, containing an array with the `botAPI` key set to true.
Note that when you login as a bot, MadelineProto also logins using the [PWRTelegram](https://pwrtelegram.xyz) API, to allow persistant storage of peers, even after a logout and another login. Note that when you login as a bot, MadelineProto also logins using the [PWRTelegram](https://pwrtelegram.xyz) API, to allow persistant storage of peers, even after a logout and another login.
### Storing sessions ### Storing sessions

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,7 +53,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,7 +53,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,7 +53,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,7 +53,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,7 +53,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -52,7 +52,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -55,7 +55,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -52,7 +52,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -55,7 +55,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -52,7 +52,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -55,7 +55,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -52,7 +52,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -51,7 +51,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -55,7 +55,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -186,7 +186,7 @@ The following tags are currently supported:
``` ```
<br>a newline <br>a newline
<b><i>bold works ok, internal tags aren\'t parsed and shown as is</i> </ b> <b><i>bold works ok, internal tags are stripped</i> </b>
<strong>bold</strong> <strong>bold</strong>
<em>italic</em> <em>italic</em>
<i>italic</i> <i>italic</i>

View File

@ -53,9 +53,10 @@ class Serialization
if (file_exists($filename)) { if (file_exists($filename)) {
$file = fopen($filename, 'r+'); $file = fopen($filename, 'r+');
flock($file, LOCK_EX); flock($file, LOCK_EX);
$unserialized = unserialize(stream_get_contents($file)); $unserialized = stream_get_contents($file);
flock($file, LOCK_UN); flock($file, LOCK_UN);
fclose($file); fclose($file);
$unserialized = unserialize($unserialized);
} else { } else {
throw new Exception('File does not exist'); throw new Exception('File does not exist');
} }

View File

@ -573,7 +573,63 @@ trait TL
return $arguments; return $arguments;
} }
public function parse_node($node, &$entities, &$nmessage, $recursive = true) {
switch ($node->nodeName) {
case 'br':
$nmessage .= "\n";
break;
case 'b':
case 'strong':
$text = $this->html_entity_decode($node->textContent);
$entities[] = ['_' => 'messageEntityBold', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'i':
case 'em':
$text = $this->html_entity_decode($node->textContent);
$entities[] = ['_' => 'messageEntityItalic', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'code':
$text = $this->html_entity_decode($node->textContent);
$entities[] = ['_' => 'messageEntityCode', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'pre':
$text = $this->html_entity_decode($node->textContent);
$language = $node->getAttribute('language');
if ($language === null) {
$language = '';
}
$entities[] = ['_' => 'messageEntityPre', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'language' => $language];
$nmessage .= $text;
break;
case 'p':
foreach ($node->childNodes as $node) {
$this->parse_node($node, $entities, $nmessage);
}
break;
case 'a':
$text = $this->html_entity_decode($node->textContent);
$href = $node->getAttribute('href');
if (preg_match('|mention:|', $href)) {
$entities[] = ['_' => 'inputMessageEntityMentionName', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'user_id' => $this->get_info(str_replace('mention:', '', $href))['InputUser']];
} else {
$entities[] = ['_' => 'messageEntityTextUrl', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'url' => $href];
}
$nmessage .= $text;
break;
default:
$nmessage .= $this->html_entity_decode($node->nodeValue);
break;
}
}
public function parse_mode($arguments) public function parse_mode($arguments)
{ {
if (preg_match('/markdown/i', $arguments['parse_mode'])) { if (preg_match('/markdown/i', $arguments['parse_mode'])) {
@ -581,67 +637,21 @@ trait TL
$arguments['parse_mode'] = 'HTML'; $arguments['parse_mode'] = 'HTML';
} }
if (preg_match('/html/i', $arguments['parse_mode'])) { if (preg_match('/html/i', $arguments['parse_mode'])) {
$dom = new \PHPHtmlParser\Dom(); try {
$dom->loadStr(str_replace("\n", '<br>', $arguments['message']), []); $dom = new \DOMDocument();
$dom->loadHTML(str_replace("\n", '<br>', $arguments['message']));
$nmessage = ''; $nmessage = '';
if (!isset($arguments['entities'])) { if (!isset($arguments['entities'])) {
$arguments['entities'] = []; $arguments['entities'] = [];
} }
foreach ($dom->find('') as $tag) { foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $node) {
switch ($tag->tag->name()) { $this->parse_node($node, $arguments['entities'], $nmessage);
case 'br':
$nmessage .= "\n";
break;
case 'b':
case 'strong':
$text = $this->html_entity_decode($tag->innerHtml);
$arguments['entities'][] = ['_' => 'messageEntityBold', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'i':
case 'em':
$text = $this->html_entity_decode($tag->innerHtml);
$arguments['entities'][] = ['_' => 'messageEntityItalic', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'code':
$text = $this->html_entity_decode($tag->innerHtml);
$arguments['entities'][] = ['_' => 'messageEntityCode', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
$nmessage .= $text;
break;
case 'pre':
$text = $this->html_entity_decode($tag->innerHtml);
$language = $tag->getAttribute('language');
if ($language === null) {
$language = '';
}
$arguments['entities'][] = ['_' => 'messageEntityPre', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'language' => $language];
$nmessage .= $text;
break;
case 'a':
$text = $this->html_entity_decode($tag->innerHtml);
$href = $tag->getAttribute('href');
if (preg_match('|mention:|', $href)) {
$arguments['entities'][] = ['_' => 'inputMessageEntityMentionName', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'user_id' => $this->get_info(str_replace('mention:', '', $href))['InputUser']];
} else {
$arguments['entities'][] = ['_' => 'messageEntityTextUrl', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'url' => $href];
}
$nmessage .= $text;
break;
default:
$nmessage .= $this->html_entity_decode($tag->outerHtml);
break;
} }
unset($arguments['parse_mode']);
} catch (\DOMException $e) {
} catch (\danog\MadelineProto\Exception $e) { ; };
} }
$arguments['message'] = $nmessage; $arguments['message'] = $nmessage;
unset($arguments['parse_mode']);
}
return $arguments; return $arguments;
} }