Switched to native html parser
This commit is contained in:
parent
c021ae362d
commit
2a04787c98
@ -10,7 +10,7 @@ Logo created by [Matthew Hesketh](http://matthewhesketh.com) (thanks again!).
|
||||
|
||||
PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old).
|
||||
|
||||
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM.
|
||||
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. You must also install the mbstring, curl extensions.
|
||||
|
||||
Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed.
|
||||
|
||||
|
@ -18,7 +18,6 @@
|
||||
"phpseclib/phpseclib": "dev-ige",
|
||||
"vlucas/phpdotenv": "^2.4",
|
||||
"krakjoe/pthreads-polyfill": "dev-master",
|
||||
"danog/php-html-parser": "dev-master",
|
||||
"erusev/parsedown": "^1.6"
|
||||
},
|
||||
"require-dev": {
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -52,7 +52,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -55,7 +55,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -14,7 +14,7 @@ Logo created by [Matthew Hesketh](http://matthewhesketh.com) (thanks again!).
|
||||
|
||||
PHP implementation of MTProto, based on [telepy](https://github.com/griganton/telepy_old).
|
||||
|
||||
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM.
|
||||
This project can run on PHP 7 and HHVM, only 64 bit systems are supported ATM. You must also install the mbstring, curl extensions.
|
||||
|
||||
Also note that MadelineProto will perform better if python and a big math extension like gmp or bcmath are installed.
|
||||
|
||||
@ -240,7 +240,7 @@ To specify a custom callback change the correct value in the settings. The speci
|
||||
|
||||
### Uploading and downloading files
|
||||
|
||||
MadelineProto provides wrapper methods to upload and download files.
|
||||
MadelineProto provides wrapper methods to upload and download files that support bot API file ids.
|
||||
|
||||
Every method described in this section accepts a last optional paramater with a callable function that will be called during the upload/download using the first parameter to pass a floating point number indicating the upload/download status in percentage.
|
||||
|
||||
@ -253,12 +253,13 @@ $inputFile = $MadelineProto->upload('file', 'optional new file name.ext');
|
||||
$MadelineProto->messages->sendMedia(['peer' => '@pwrtelegramgroup', 'media' => $inputMedia]);
|
||||
```
|
||||
|
||||
To convert the result of sendMedia to a bot API file id select the messageMedia object from the output of the method and pass it to `$MadelineProto->API->MTProto_to_botAPI()`.
|
||||
|
||||
See tests/testing.php for more examples.
|
||||
|
||||
|
||||
There are multiple download methods that allow you to download a file to a directory, to a file or to a stream.
|
||||
The first parameter of these functions must always be a [messageMediaPhoto](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaPhoto.html) or a [messageMediaDocument](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaDocument.html) object. These objects are usually received in updates, see `bot.php` for examples
|
||||
The first parameter of these functions must always be either a [messageMediaPhoto](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaPhoto.html) or a [messageMediaDocument](https://daniil.it/MadelineProto/API_docs/constructors/messageMediaDocument.html) object or a bot API file id. These objects are usually received in updates, see `bot.php` for examples
|
||||
|
||||
|
||||
```
|
||||
@ -309,6 +310,8 @@ See tests/testing.php for more examples.
|
||||
|
||||
Methods that allow sending message entities (messages.sendMessage for example) also have an additional parse_mode parameter that enables or disables html/markdown parsing of the message to be sent. See the method-specific documentation for more info.
|
||||
|
||||
To convert the results of methods to bot API objects you must provide a second parameter to method wrappers, containing an array with the `botAPI` key set to true.
|
||||
|
||||
Note that when you login as a bot, MadelineProto also logins using the [PWRTelegram](https://pwrtelegram.xyz) API, to allow persistant storage of peers, even after a logout and another login.
|
||||
|
||||
### Storing sessions
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,7 +53,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,7 +53,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,7 +53,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,7 +53,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,7 +53,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -52,7 +52,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -55,7 +55,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -52,7 +52,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -55,7 +55,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -52,7 +52,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -55,7 +55,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -52,7 +52,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -51,7 +51,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -55,7 +55,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren't parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -186,7 +186,7 @@ The following tags are currently supported:
|
||||
|
||||
```
|
||||
<br>a newline
|
||||
<b><i>bold works ok, internal tags aren\'t parsed and shown as is</i> </ b>
|
||||
<b><i>bold works ok, internal tags are stripped</i> </b>
|
||||
<strong>bold</strong>
|
||||
<em>italic</em>
|
||||
<i>italic</i>
|
||||
|
@ -53,9 +53,10 @@ class Serialization
|
||||
if (file_exists($filename)) {
|
||||
$file = fopen($filename, 'r+');
|
||||
flock($file, LOCK_EX);
|
||||
$unserialized = unserialize(stream_get_contents($file));
|
||||
$unserialized = stream_get_contents($file);
|
||||
flock($file, LOCK_UN);
|
||||
fclose($file);
|
||||
$unserialized = unserialize($unserialized);
|
||||
} else {
|
||||
throw new Exception('File does not exist');
|
||||
}
|
||||
|
@ -573,7 +573,63 @@ trait TL
|
||||
|
||||
return $arguments;
|
||||
}
|
||||
public function parse_node($node, &$entities, &$nmessage, $recursive = true) {
|
||||
switch ($node->nodeName) {
|
||||
case 'br':
|
||||
$nmessage .= "\n";
|
||||
break;
|
||||
case 'b':
|
||||
case 'strong':
|
||||
$text = $this->html_entity_decode($node->textContent);
|
||||
$entities[] = ['_' => 'messageEntityBold', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
case 'em':
|
||||
$text = $this->html_entity_decode($node->textContent);
|
||||
$entities[] = ['_' => 'messageEntityItalic', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'code':
|
||||
$text = $this->html_entity_decode($node->textContent);
|
||||
$entities[] = ['_' => 'messageEntityCode', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'pre':
|
||||
$text = $this->html_entity_decode($node->textContent);
|
||||
$language = $node->getAttribute('language');
|
||||
if ($language === null) {
|
||||
$language = '';
|
||||
}
|
||||
$entities[] = ['_' => 'messageEntityPre', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'language' => $language];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'p':
|
||||
foreach ($node->childNodes as $node) {
|
||||
$this->parse_node($node, $entities, $nmessage);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
$text = $this->html_entity_decode($node->textContent);
|
||||
$href = $node->getAttribute('href');
|
||||
if (preg_match('|mention:|', $href)) {
|
||||
$entities[] = ['_' => 'inputMessageEntityMentionName', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'user_id' => $this->get_info(str_replace('mention:', '', $href))['InputUser']];
|
||||
} else {
|
||||
$entities[] = ['_' => 'messageEntityTextUrl', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'url' => $href];
|
||||
}
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
default:
|
||||
$nmessage .= $this->html_entity_decode($node->nodeValue);
|
||||
break;
|
||||
}
|
||||
}
|
||||
public function parse_mode($arguments)
|
||||
{
|
||||
if (preg_match('/markdown/i', $arguments['parse_mode'])) {
|
||||
@ -581,67 +637,21 @@ trait TL
|
||||
$arguments['parse_mode'] = 'HTML';
|
||||
}
|
||||
if (preg_match('/html/i', $arguments['parse_mode'])) {
|
||||
$dom = new \PHPHtmlParser\Dom();
|
||||
$dom->loadStr(str_replace("\n", '<br>', $arguments['message']), []);
|
||||
$nmessage = '';
|
||||
if (!isset($arguments['entities'])) {
|
||||
$arguments['entities'] = [];
|
||||
}
|
||||
foreach ($dom->find('') as $tag) {
|
||||
switch ($tag->tag->name()) {
|
||||
case 'br':
|
||||
$nmessage .= "\n";
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
case 'strong':
|
||||
$text = $this->html_entity_decode($tag->innerHtml);
|
||||
$arguments['entities'][] = ['_' => 'messageEntityBold', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
case 'em':
|
||||
$text = $this->html_entity_decode($tag->innerHtml);
|
||||
$arguments['entities'][] = ['_' => 'messageEntityItalic', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'code':
|
||||
$text = $this->html_entity_decode($tag->innerHtml);
|
||||
$arguments['entities'][] = ['_' => 'messageEntityCode', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text)];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'pre':
|
||||
$text = $this->html_entity_decode($tag->innerHtml);
|
||||
$language = $tag->getAttribute('language');
|
||||
if ($language === null) {
|
||||
$language = '';
|
||||
}
|
||||
$arguments['entities'][] = ['_' => 'messageEntityPre', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'language' => $language];
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
$text = $this->html_entity_decode($tag->innerHtml);
|
||||
$href = $tag->getAttribute('href');
|
||||
if (preg_match('|mention:|', $href)) {
|
||||
$arguments['entities'][] = ['_' => 'inputMessageEntityMentionName', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'user_id' => $this->get_info(str_replace('mention:', '', $href))['InputUser']];
|
||||
} else {
|
||||
$arguments['entities'][] = ['_' => 'messageEntityTextUrl', 'offset' => mb_strlen($nmessage), 'length' => mb_strlen($text), 'url' => $href];
|
||||
}
|
||||
$nmessage .= $text;
|
||||
break;
|
||||
|
||||
default:
|
||||
$nmessage .= $this->html_entity_decode($tag->outerHtml);
|
||||
break;
|
||||
}
|
||||
}
|
||||
$arguments['message'] = $nmessage;
|
||||
unset($arguments['parse_mode']);
|
||||
try {
|
||||
$dom = new \DOMDocument();
|
||||
$dom->loadHTML(str_replace("\n", '<br>', $arguments['message']));
|
||||
$nmessage = '';
|
||||
if (!isset($arguments['entities'])) {
|
||||
$arguments['entities'] = [];
|
||||
}
|
||||
foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $node) {
|
||||
$this->parse_node($node, $arguments['entities'], $nmessage);
|
||||
}
|
||||
unset($arguments['parse_mode']);
|
||||
} catch (\DOMException $e) {
|
||||
} catch (\danog\MadelineProto\Exception $e) { ; };
|
||||
}
|
||||
$arguments['message'] = $nmessage;
|
||||
|
||||
return $arguments;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user