Huge refactoring, custom schema changes, dump-schemas command

This commit is contained in:
Sys 2022-04-15 23:33:46 +02:00
parent 1a22f3eda9
commit b63c45b5a4
No known key found for this signature in database
GPG Key ID: 3CD2C29F8AB39BFD
18 changed files with 1119 additions and 907 deletions

View File

@ -8,6 +8,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## [4.0.0] - 2022-04-15
### Added
- Support for bot API 5.6.0 and 5.7.0
- New `app:dump-schemas` command, used to generate schemas for all bot API versions.
- New `default` property for fields, it contains their default values when they're unspecified.
### Changed
- (**Breaking change**) Array format in custom schema has been changed from `Array<Foo, Bar, Baz>` to `Array<Foo|Bar|Baz>`.
- Updated dependencies to their latest available version.
### Fixed
- Increased speed dramatically by replacing the DOM parser, it's a lot faster now!
- Huge refactoring, improved code quality and readability.
- Some minor bug fixes.
## [3.0.3] - 2021-12-11
### Added
- Support for bot API 5.5.0.
@ -150,7 +165,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- The parser is now more reliable, it no longer needs to be updated at every bot API release!
[Unreleased]: https://github.com/Sysbot-org/tgscraper/compare/3.0.3...HEAD
[Unreleased]: https://github.com/Sysbot-org/tgscraper/compare/4.0...HEAD
[4.0.0]: https://github.com/Sysbot-org/tgscraper/compare/3.0.3...4.0
[3.0.3]: https://github.com/Sysbot-org/tgscraper/compare/3.0.2...3.0.3
[3.0.2]: https://github.com/Sysbot-org/tgscraper/compare/3.0.1...3.0.2
[3.0.1]: https://github.com/Sysbot-org/tgscraper/compare/3.0...3.0.1

View File

@ -87,6 +87,16 @@ Create stubs in the `out/` directory using `Sysbot\Telegram` as namespace prefix
$ vendor/bin/tgscraper app:create-stubs --namespace-prefix "Sysbot\Telegram" out
```
### All versions
If you want to generate all schemas and stubs for every Bot API version, you can!
Here's an example on how to export everything to the `out/` directory, with schemas in human-readable format and using `Sysbot\Telegram` as namespace prefix for the stubs:
```bash
$ vendor/bin/tgscraper app:dump-schemas -r --namespace-prefix "Sysbot\Telegram" out
```
## Custom format
If you're interested in the custom format generated by TGScraper, you can find its schema [here](docs/schema.json).

View File

@ -1,9 +1,11 @@
#!/usr/bin/env php
<?php
use Composer\InstalledVersions;
use Symfony\Component\Console\Application;
use TgScraper\Commands\CreateStubsCommand;
use TgScraper\Commands\DumpSchemasCommand;
use TgScraper\Commands\ExportSchemaCommand;
$autoloadFiles = [
@ -22,10 +24,11 @@ $application = new Application('TGScraper', InstalledVersions::getVersion('sysbo
$application->add(new CreateStubsCommand());
$application->add(new ExportSchemaCommand());
$application->add(new DumpSchemasCommand());
try {
$exitCode = $application->run();
} catch (Exception $e) {
} catch (Throwable $e) {
echo $e->getMessage() . PHP_EOL;
}

View File

@ -6,11 +6,12 @@
"php": ">=8.0",
"ext-json": "*",
"composer-runtime-api": "^2.0",
"nette/php-generator": "^3.5",
"paquettg/php-html-parser": "^3.1",
"guzzlehttp/guzzle": "^7.0",
"nette/php-generator": "^4.0",
"psr/log": "^1.1",
"symfony/console": "^5.3",
"symfony/yaml": "^5.3"
"symfony/console": "^6.0",
"symfony/yaml": "^6.0",
"voku/simple_html_dom": "^4.7"
},
"suggest": {
"sysbot/tgscraper-cache": "To speed up schema fetching and generation."
@ -20,6 +21,11 @@
"TgScraper\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"TgScraper\\Tests\\": "tests/"
}
},
"bin": [
"bin/tgscraper"
],

950
composer.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,8 @@
"name",
"types",
"optional",
"description"
"description",
"default"
],
"properties": {
"name": {
@ -73,11 +74,27 @@
"type": "string"
}
},
"required": {
"optional": {
"type": "boolean"
},
"description": {
"type": "string"
},
"default": {
"oneOf": [
{
"type": "boolean"
},
{
"type": "integer"
},
{
"type": "object"
},
{
"type": "string"
}
]
}
}
},

19
psalm.xml Normal file
View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<psalm
errorLevel="4"
resolveFromConfigFile="true"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="https://getpsalm.org/schema/config"
xsi:schemaLocation="https://getpsalm.org/schema/config vendor/vimeo/psalm/config.xsd"
>
<projectFiles>
<directory name="src" />
<ignoreFiles>
<directory name="vendor" />
</ignoreFiles>
</projectFiles>
<issueHandlers>
<TooManyTemplateParams errorLevel="suppress" />
<NoInterfaceProperties errorLevel="suppress" />
</issueHandlers>
</psalm>

33
src/Commands/Common.php Normal file
View File

@ -0,0 +1,33 @@
<?php
namespace TgScraper\Commands;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Logger\ConsoleLogger;
use Symfony\Component\Console\Output\OutputInterface;
trait Common
{
protected function saveFile(
ConsoleLogger $logger,
OutputInterface $output,
string $destination,
string $data,
?string $prefix = null,
bool $log = true
): int {
$result = file_put_contents($destination, $data);
if (false === $result) {
$logger->critical($prefix . 'Unable to save file to ' . $destination);
return Command::FAILURE;
}
if ($log) {
$logger->info($prefix . 'Done!');
return Command::SUCCESS;
}
$output->writeln($prefix . 'Done!');
return Command::SUCCESS;
}
}

View File

@ -0,0 +1,167 @@
<?php
namespace TgScraper\Commands;
use Exception;
use FilesystemIterator;
use PharData;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
use ReflectionClass;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Logger\ConsoleLogger;
use Symfony\Component\Console\Output\OutputInterface;
use TgScraper\Common\Encoder;
use TgScraper\Constants\Versions;
use TgScraper\TgScraper;
use Throwable;
class DumpSchemasCommand extends Command
{
use Common;
protected static $defaultName = 'app:dump-schemas';
protected static function rrmdir(string $directory): void
{
$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($directory, FilesystemIterator::SKIP_DOTS),
RecursiveIteratorIterator::CHILD_FIRST
);
foreach ($files as $fileInfo) {
$todo = ($fileInfo->isDir() ? 'rmdir' : 'unlink');
$todo($fileInfo->getRealPath());
}
rmdir($directory);
}
protected function configure(): void
{
$this
->setDescription('Export all schemas and stubs to a directory.')
->setHelp('This command allows you to generate the schemas for all versions of the Telegram bot API.')
->addArgument('destination', InputArgument::REQUIRED, 'Destination directory')
->addOption(
'namespace-prefix',
null,
InputOption::VALUE_REQUIRED,
'Namespace prefix for stubs',
'TelegramApi'
)
->addOption(
'readable',
'r',
InputOption::VALUE_NONE,
'Generate human-readable files'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$versionReplacer = function (string $ver) {
/** @noinspection PhpUndefinedFieldInspection */
$this->version = $ver;
};
$logger = new ConsoleLogger($output);
$destination = $input->getArgument('destination');
$readable = $input->getOption('readable');
$output->writeln('Creating directory tree...');
try {
$destination = TgScraper::getTargetDirectory($destination);
mkdir($destination . '/custom/json', 0755, true);
mkdir($destination . '/custom/yaml', 0755, true);
mkdir($destination . '/postman', 0755, true);
mkdir($destination . '/openapi/json', 0755, true);
mkdir($destination . '/openapi/yaml', 0755, true);
mkdir($destination . '/stubs', 0755, true);
} catch (Exception $e) {
$logger->critical((string)$e);
return Command::FAILURE;
}
$versions = array_keys(
(new ReflectionClass(Versions::class))
->getConstants()['URLS']
);
$versions = array_diff($versions, ['latest']);
foreach ($versions as $version) {
$output->writeln(sprintf('Generating v%s schemas...', $version));
$filename = 'v' . str_replace('.', '', $version);
try {
$logger->info($version . ': Fetching data...');
$generator = TgScraper::fromVersion($logger, $version);
} catch (Throwable $e) {
$logger->critical((string)$e);
return Command::FAILURE;
}
$versionReplacer->call($generator, $version);
$custom = $generator->toArray();
$postman = $generator->toPostman();
$openapi = $generator->toOpenApi();
try {
$logger->info($version . ': Creating stubs...');
$generator->toStubs("$destination/tmp", $input->getOption('namespace-prefix'));
} catch (Exception) {
$logger->critical($version . ': Could not create stubs.');
return Command::FAILURE;
}
$logger->info($version . ': Compressing stubs...');
$zip = new PharData("$destination/stubs/$filename.zip");
$zip->buildFromDirectory("$destination/tmp");
self::rrmdir("$destination/tmp");
$logger->info($version . ': Saving schemas...');
if ($this->saveFile(
$logger,
$output,
"$destination/custom/json/$filename.json",
Encoder::toJson($custom, readable: $readable),
sprintf('v%s custom (JSON): ', $version)
) !== Command::SUCCESS) {
return Command::FAILURE;
}
if ($this->saveFile(
$logger,
$output,
"$destination/custom/yaml/$filename.yaml",
Encoder::toYaml($custom),
sprintf('v%s custom (YAML): ', $version)
) !== Command::SUCCESS) {
return Command::FAILURE;
}
if ($this->saveFile(
$logger,
$output,
"$destination/postman/$filename.json",
Encoder::toJson($postman, readable: $readable),
sprintf('v%s Postman: ', $version)
) !== Command::SUCCESS) {
return Command::FAILURE;
}
if ($this->saveFile(
$logger,
$output,
"$destination/openapi/json/$filename.json",
Encoder::toJson($openapi, readable: $readable),
sprintf('v%s OpenAPI (JSON): ', $version)
) !== Command::SUCCESS) {
return Command::FAILURE;
}
if ($this->saveFile(
$logger,
$output,
"$destination/openapi/yaml/$filename.yaml",
Encoder::toYaml($openapi),
sprintf('v%s OpenAPI (YAML): ', $version)
) !== Command::SUCCESS) {
return Command::FAILURE;
}
$logger->info($version . ': Done!');
}
$output->writeln('Done!');
return Command::SUCCESS;
}
}

View File

@ -5,7 +5,6 @@ namespace TgScraper\Commands;
use Exception;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
@ -20,6 +19,8 @@ use Throwable;
class ExportSchemaCommand extends Command
{
use Common;
protected static $defaultName = 'app:export-schema';
protected function configure(): void
@ -64,17 +65,6 @@ class ExportSchemaCommand extends Command
);
}
private function saveFile(ConsoleLogger $logger, OutputInterface $output, string $destination, string $data): int
{
$result = file_put_contents($destination, $data);
if (false === $result) {
$logger->critical('Unable to save file to ' . $destination);
return Command::FAILURE;
}
$output->writeln('Done!');
return Command::SUCCESS;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$logger = new ConsoleLogger($output);
@ -86,7 +76,8 @@ class ExportSchemaCommand extends Command
try {
$output->writeln('Fetching data for version...');
$generator = TgScraper::fromVersion($logger, $version);
} catch (Throwable) {
} catch (Throwable $e) {
$logger->critical((string)$e);
return Command::FAILURE;
}
$output->writeln('Exporting schema from data...');
@ -105,19 +96,19 @@ class ExportSchemaCommand extends Command
if ($input->getOption('openapi')) {
$data = $generator->toOpenApi();
if ($useYaml) {
return $this->saveFile($logger, $output, $destination, Encoder::toYaml($data, $inline, $indent, $options));
return $this->saveFile($logger, $output, $destination, Encoder::toYaml($data, $inline, $indent, $options), log: false);
}
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options | JSON_UNESCAPED_SLASHES, $readable));
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options | JSON_UNESCAPED_SLASHES, $readable), log: false);
}
if ($input->getOption('postman')) {
$data = $generator->toPostman();
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options, $readable));
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options, $readable), log: false);
}
$data = $generator->toArray();
if ($useYaml) {
return $this->saveFile($logger, $output, $destination, Encoder::toYaml($data, $inline, $indent, $options));
return $this->saveFile($logger, $output, $destination, Encoder::toYaml($data, $inline, $indent, $options), log: false);
}
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options, $readable));
return $this->saveFile($logger, $output, $destination, Encoder::toJson($data, $options, $readable), log: false);
}
}

View File

@ -80,6 +80,9 @@ class OpenApiGenerator
$schema['required'][] = $name;
}
$schema['properties'][$name] = self::parsePropertyTypes($field['types']);
if (!empty($field['default'] ?? null)) {
$schema['properties'][$name]['default'] = $field['default'];
}
}
return $schema;
}
@ -116,7 +119,7 @@ class OpenApiGenerator
if (preg_match('/Array<(.+)>/', $type, $matches) === 1) {
return [
'type' => 'array',
'items' => self::parsePropertyTypes(explode(',', $matches[1]))
'items' => self::parsePropertyTypes(explode('|', $matches[1]))
];
}
return [];

View File

@ -1,25 +1,23 @@
<?php
namespace TgScraper\Common;
use Composer\InstalledVersions;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use InvalidArgumentException;
use JetBrains\PhpStorm\ArrayShape;
use OutOfBoundsException;
use PHPHtmlParser\Dom;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\ContentLengthException;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\NotLoadedException;
use PHPHtmlParser\Exceptions\ParentNotFoundException;
use PHPHtmlParser\Exceptions\StrictException;
use Psr\Http\Client\ClientExceptionInterface;
use Psr\Log\LoggerInterface;
use RuntimeException;
use TgScraper\Parsers\Field;
use TgScraper\Parsers\ObjectDescription;
use TgScraper\Constants\Versions;
use Throwable;
use voku\helper\HtmlDomParser;
use voku\helper\SimpleHtmlDomInterface;
use voku\helper\SimpleHtmlDomNode;
use voku\helper\SimpleHtmlDomNodeInterface;
/**
* Class SchemaExtractor
@ -27,15 +25,6 @@ use Throwable;
*/
class SchemaExtractor
{
/**
* Additional methods with boolean return value.
*/
private const BOOL_RETURNS = [
'answerShippingQuery',
'answerPreCheckoutQuery'
];
/**
* @var string
*/
@ -44,11 +33,9 @@ class SchemaExtractor
/**
* SchemaExtractor constructor.
* @param LoggerInterface $logger
* @param Dom $dom
* @throws ChildNotFoundException
* @throws NotLoadedException
* @param HtmlDomParser $dom
*/
public function __construct(private LoggerInterface $logger, private Dom $dom)
public function __construct(private LoggerInterface $logger, private HtmlDomParser $dom)
{
$this->version = $this->parseVersion();
$this->logger->info('Bot API version: ' . $this->version);
@ -60,13 +47,17 @@ class SchemaExtractor
* @param string $version
* @return SchemaExtractor
* @throws OutOfBoundsException
* @throws Throwable
* @throws Exception
* @throws GuzzleException
*/
public static function fromVersion(LoggerInterface $logger, string $version = Versions::LATEST): SchemaExtractor
{
if (InstalledVersions::isInstalled('sysbot/tgscraper-cache') and class_exists('\TgScraper\Cache\CacheLoader')) {
$logger->info('Cache package detected, searching for a cached version.');
try {
/** @noinspection PhpFullyQualifiedNameUsageInspection */
/** @noinspection PhpUndefinedNamespaceInspection */
/** @psalm-suppress UndefinedClass */
$path = \TgScraper\Cache\CacheLoader::getCachedVersion($version);
$logger->info('Cached version found.');
return self::fromFile($logger, $path);
@ -83,20 +74,20 @@ class SchemaExtractor
* @param LoggerInterface $logger
* @param string $path
* @return SchemaExtractor
* @throws Throwable
* @throws InvalidArgumentException
* @throws RuntimeException
*/
public static function fromFile(LoggerInterface $logger, string $path): SchemaExtractor
{
$dom = new Dom;
if (!file_exists($path)) {
if (!file_exists($path) or is_dir($path)) {
throw new InvalidArgumentException('File not found');
}
$path = realpath($path);
try {
$logger->info(sprintf('Loading data from file "%s".', $path));
$dom->loadFromFile($path);
$dom = HtmlDomParser::file_get_html($path);
$logger->info('Data loaded.');
} catch (Throwable $e) {
} catch (RuntimeException $e) {
$logger->critical(sprintf('Unable to load data from "%s": %s', $path, $e->getMessage()));
throw $e;
}
@ -107,20 +98,15 @@ class SchemaExtractor
* @param LoggerInterface $logger
* @param string $url
* @return SchemaExtractor
* @throws ChildNotFoundException
* @throws CircularException
* @throws ClientExceptionInterface
* @throws ContentLengthException
* @throws LogicalException
* @throws StrictException
* @throws NotLoadedException
* @throws GuzzleException
*/
public static function fromUrl(LoggerInterface $logger, string $url): SchemaExtractor
{
$dom = new Dom;
$client = new Client();
try {
$dom->loadFromURL($url);
} catch (Throwable $e) {
$html = $client->get($url)->getBody();
$dom = HtmlDomParser::str_get_html((string)$html);
} catch (GuzzleException $e) {
$logger->critical(sprintf('Unable to load data from URL "%s": %s', $url, $e->getMessage()));
throw $e;
}
@ -129,57 +115,56 @@ class SchemaExtractor
}
/**
* @throws ParentNotFoundException
* @throws ChildNotFoundException
* @param SimpleHtmlDomInterface $node
* @return array{description: string, table: ?SimpleHtmlDomNodeInterface, extended_by: string[]}
*/
#[ArrayShape(['description' => "string", 'table' => "mixed", 'extended_by' => "array"])]
private static function parseNode(Dom\Node\AbstractNode $node): ?array
private static function parseNode(SimpleHtmlDomInterface $node): array
{
$description = '';
$table = null;
$extendedBy = [];
$tag = '';
$sibling = $node;
while (!str_starts_with($tag, 'h')) {
$sibling = $sibling->nextSibling();
$tag = $sibling?->tag?->name();
if (empty($node->text()) or empty($tag) or $tag == 'text') {
while (!str_starts_with($tag ?? '', 'h')) {
$sibling = $sibling?->nextSibling();
$tag = $sibling?->tag;
if (empty($node->text()) or empty($tag) or $tag == 'text' or empty($sibling)) {
continue;
} elseif ($tag == 'p') {
$description .= PHP_EOL . $sibling->innerHtml();
} elseif ($tag == 'ul') {
$items = $sibling->find('li');
/* @var Dom\Node\AbstractNode $item */
foreach ($items as $item) {
$extendedBy[] = $item->innerText;
}
break;
} elseif ($tag == 'table') {
$table = $sibling->find('tbody')->find('tr');
break;
}
switch ($tag) {
case 'p':
$description .= PHP_EOL . $sibling->innerHtml();
break;
case 'ul':
$items = $sibling->findMulti('li');
foreach ($items as $item) {
$extendedBy[] = $item->text();
}
break 2;
case 'table':
/** @var SimpleHtmlDomNodeInterface $table */
$table = $sibling->findOne('tbody')->findMulti('tr');
break 2;
}
}
return ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy];
}
/**
* @throws ChildNotFoundException
* @throws NotLoadedException
* @return string
*/
private function parseVersion(): string
{
/** @var Dom\Node\AbstractNode $element */
$element = $this->dom->find('h3')[0];
$element = $this->dom->findOne('h3');
$tag = '';
while ($tag != 'p') {
try {
$element = $element->nextSibling();
} catch (ChildNotFoundException | ParentNotFoundException) {
continue;
}
$tag = $element->tag->name();
while ($tag != 'p' and !empty($element)) {
$element = $element->nextSibling();
$tag = $element?->tag;
}
$versionNumbers = explode('.', str_replace('Bot API ', '', $element->innerText));
if (empty($element)) {
return '1.0.0';
}
$versionNumbers = explode('.', str_replace('Bot API ', '', $element->text()));
return sprintf(
'%s.%s.%s',
$versionNumbers[0] ?? '1',
@ -189,22 +174,26 @@ class SchemaExtractor
}
/**
* @return array
* @throws Throwable
* @return string
*/
public function getVersion(): string
{
return $this->version;
}
/**
* @return array{version: string, methods: array, types: array}
* @throws Exception
*/
#[ArrayShape(['version' => "string", 'methods' => "array", 'types' => "array"])]
public function extract(): array
{
try {
$elements = $this->dom->find('h4');
} catch (Throwable $e) {
$this->logger->critical(sprintf('Unable to parse data: %s', $e->getMessage()));
throw $e;
$elements = $this->dom->findMultiOrFalse('h4');
if (false === $elements) {
throw new Exception('Unable to fetch required DOM nodes');
}
$data = ['version' => $this->version];
/* @var Dom\Node\AbstractNode $element */
$data = ['version' => $this->version, 'methods' => [], 'types' => []];
foreach ($elements as $element) {
if (!str_contains($name = $element->text, ' ')) {
if (!str_contains($name = $element->text(), ' ')) {
$isMethod = lcfirst($name) == $name;
$path = $isMethod ? 'methods' : 'types';
['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode(
@ -225,21 +214,15 @@ class SchemaExtractor
/**
* @param string $name
* @param string $description
* @param Dom\Node\Collection|null $unparsedFields
* @param SimpleHtmlDomNodeInterface|null $unparsedFields
* @param array $extendedBy
* @param bool $isMethod
* @return array
* @throws ChildNotFoundException
* @throws CircularException
* @throws ContentLengthException
* @throws LogicalException
* @throws NotLoadedException
* @throws StrictException
*/
private static function generateElement(
string $name,
string $description,
?Dom\Node\Collection $unparsedFields,
?SimpleHtmlDomNodeInterface $unparsedFields,
array $extendedBy,
bool $isMethod
): array {
@ -250,10 +233,8 @@ class SchemaExtractor
'fields' => $fields
];
if ($isMethod) {
$returnTypes = self::parseReturnTypes($description);
if (empty($returnTypes) and in_array($name, self::BOOL_RETURNS)) {
$returnTypes[] = 'bool';
}
$description = new ObjectDescription($description);
$returnTypes = $description->getTypes();
$result['return_types'] = $returnTypes;
return $result;
}
@ -262,126 +243,33 @@ class SchemaExtractor
}
/**
* @param Dom\Node\Collection|null $fields
* @param SimpleHtmlDomNodeInterface|null $fields
* @param bool $isMethod
* @return array
*/
private static function parseFields(?Dom\Node\Collection $fields, bool $isMethod): array
private static function parseFields(?SimpleHtmlDomNodeInterface $fields, bool $isMethod): array
{
$parsedFields = [];
$fields = $fields ?? [];
$fields ??= [];
/** @var SimpleHtmlDomInterface $field */
foreach ($fields as $field) {
/* @var Dom\Node\AbstractNode $fieldData */
$fieldData = $field->find('td');
$name = $fieldData[0]->text;
/** @var SimpleHtmlDomNode $fieldData */
$fieldData = $field->findMulti('td');
$name = $fieldData[0]->text();
if (empty($name)) {
continue;
}
$parsedData = [
'name' => $name,
'type' => strip_tags($fieldData[1]->innerHtml)
];
$parsedData['types'] = self::parseFieldTypes($parsedData['type']);
unset($parsedData['type']);
$types = $fieldData[1]->text();
if ($isMethod) {
$parsedData['optional'] = $fieldData[2]->text != 'Yes';
$parsedData['description'] = htmlspecialchars_decode(
strip_tags($fieldData[3]->innerHtml ?? $fieldData[3]->text ?? ''),
ENT_QUOTES
);
$optional = $fieldData[2]->text() != 'Yes';
$description = $fieldData[3]->innerHtml();
} else {
$description = htmlspecialchars_decode(strip_tags($fieldData[2]->innerHtml), ENT_QUOTES);
$parsedData['optional'] = str_starts_with($description, 'Optional.');
$parsedData['description'] = $description;
$description = $fieldData[2]->innerHtml();
$optional = str_starts_with($fieldData[2]->text(), 'Optional.');
}
$parsedFields[] = $parsedData;
$field = new Field($name, $types, $optional, $description);
$parsedFields[] = $field->toArray();
}
return $parsedFields;
}
/**
* @param string $rawType
* @return array
*/
private static function parseFieldTypes(string $rawType): array
{
$types = [];
foreach (explode(' or ', $rawType) as $rawOrType) {
if (stripos($rawOrType, 'array') === 0) {
$types[] = str_replace(' and', ',', $rawOrType);
continue;
}
foreach (explode(' and ', $rawOrType) as $unparsedType) {
$types[] = $unparsedType;
}
}
$parsedTypes = [];
foreach ($types as $type) {
$type = trim(str_replace(['number', 'of'], '', $type));
$multiplesCount = substr_count(strtolower($type), 'array');
$parsedType = trim(
str_replace(
['Array', 'Integer', 'String', 'Boolean', 'Float', 'True'],
['', 'int', 'string', 'bool', 'float', 'bool'],
$type
)
);
for ($i = 0; $i < $multiplesCount; $i++) {
$parsedType = sprintf('Array<%s>', $parsedType);
}
$parsedTypes[] = $parsedType;
}
return $parsedTypes;
}
/**
* @param string $description
* @return array
* @throws ChildNotFoundException
* @throws CircularException
* @throws NotLoadedException
* @throws StrictException
* @throws ContentLengthException
* @throws LogicalException
* @noinspection PhpUndefinedFieldInspection
*/
private static function parseReturnTypes(string $description): array
{
$returnTypes = [];
$phrases = explode('.', $description);
$phrases = array_filter(
$phrases,
function ($phrase) {
return (false !== stripos($phrase, 'returns') or false !== stripos($phrase, 'is returned'));
}
);
foreach ($phrases as $phrase) {
$dom = new Dom;
$dom->loadStr($phrase);
$a = $dom->find('a');
$em = $dom->find('em');
foreach ($a as $element) {
if ($element->text == 'Messages') {
$returnTypes[] = 'Array<Message>';
continue;
}
$multiplesCount = substr_count(strtolower($phrase), 'array');
$returnType = $element->text;
for ($i = 0; $i < $multiplesCount; $i++) {
$returnType = sprintf('Array<%s>', $returnType);
}
$returnTypes[] = $returnType;
}
foreach ($em as $element) {
if (in_array($element->text, ['False', 'force', 'Array'])) {
continue;
}
$type = str_replace(['True', 'Int', 'String'], ['bool', 'int', 'string'], $element->text);
$returnTypes[] = $type;
}
}
return $returnTypes;
}
}
}

View File

@ -1,12 +1,12 @@
<?php
/** @noinspection PhpArrayShapeAttributeCanBeAddedInspection */
/** @noinspection PhpInternalEntityUsedInspection */
namespace TgScraper\Common;
use InvalidArgumentException;
use JetBrains\PhpStorm\ArrayShape;
use Nette\PhpGenerator\Helpers;
use Nette\PhpGenerator\PhpFile;
use Nette\PhpGenerator\PhpNamespace;
@ -38,6 +38,7 @@ class StubCreator
* StubCreator constructor.
* @param array $schema
* @param string $namespace
* @throws InvalidArgumentException
*/
public function __construct(private array $schema, string $namespace = '')
{
@ -59,7 +60,7 @@ class StubCreator
/**
* Builds the abstract and the extended class lists.
*/
private function getExtendedTypes()
private function getExtendedTypes(): void
{
foreach ($this->schema['types'] as $type) {
if (!empty($type['extended_by'])) {
@ -85,10 +86,6 @@ class StubCreator
* @param PhpNamespace $phpNamespace
* @return array
*/
#[ArrayShape([
'types' => "string",
'comments' => "string"
])]
private function parseFieldTypes(array $fieldTypes, PhpNamespace $phpNamespace): array
{
$types = [];
@ -116,10 +113,6 @@ class StubCreator
* @param PhpNamespace $phpNamespace
* @return array
*/
#[ArrayShape([
'types' => "string",
'comments' => "string"
])]
private function parseApiFieldTypes(array $apiTypes, PhpNamespace $phpNamespace): array
{
$types = [];
@ -128,6 +121,17 @@ class StubCreator
$comments[] = $apiType;
if (str_starts_with($apiType, 'Array')) {
$types[] = 'array';
$text = $apiType;
while (preg_match('/Array<(.+)>/', $text, $matches) === 1) {
$text = $matches[1];
}
$subTypes = explode('|', $text);
foreach ($subTypes as $subType) {
if (ucfirst($subType) == $subType) {
$subType = $this->namespace . '\\Types\\' . $subType;
$phpNamespace->addUse($subType);
}
}
continue;
}
if (ucfirst($apiType) == $apiType) {
@ -136,7 +140,7 @@ class StubCreator
}
$types[] = $apiType;
}
$comments = empty($comments) ? '' : sprintf('@var %s', implode('|', $comments));
$comments = empty($comments) ? '' : sprintf('@param %s', implode('|', $comments));
return [
'types' => implode('|', $types),
'comments' => $comments
@ -147,10 +151,6 @@ class StubCreator
* @param string $namespace
* @return PhpFile[]
*/
#[ArrayShape([
'Response' => "\Nette\PhpGenerator\PhpFile",
'TypeInterface' => "\Nette\PhpGenerator\ClassType"
])]
private function generateDefaultTypes(string $namespace): array
{
$interfaceFile = new PhpFile;
@ -158,25 +158,30 @@ class StubCreator
$interfaceNamespace->addInterface('TypeInterface');
$responseFile = new PhpFile;
$responseNamespace = $responseFile->addNamespace($namespace);
$response = $responseNamespace->addClass('Response')
->setType('class');
$responseNamespace->addUse('stdClass');
$response = $responseNamespace->addClass('Response');
$response->addProperty('ok')
->setPublic()
->setType(Type::BOOL);
$response->addProperty('result')
->setPublic()
->setType(Type::MIXED)
->setNullable(true)
->setType(sprintf('stdClass|%s\\TypeInterface|array|int|string|bool', $namespace))
->setNullable()
->setValue(null);
$response->addProperty('errorCode')
->setPublic()
->setType(Type::INT)
->setNullable(true)
->setNullable()
->setValue(null);
$response->addProperty('description')
->setPublic()
->setType(Type::STRING)
->setNullable(true)
->setNullable()
->setValue(null);
$response->addProperty('parameters')
->setPublic()
->setType(sprintf('stdClass|%s\\ResponseParameters', $namespace))
->setNullable()
->setValue(null);
$response->addImplement($namespace . '\\TypeInterface');
return [
@ -195,13 +200,12 @@ class StubCreator
foreach ($this->schema['types'] as $type) {
$file = new PhpFile;
$phpNamespace = $file->addNamespace($namespace);
$typeClass = $phpNamespace->addClass($type['name'])
->setType('class');
$typeClass = $phpNamespace->addClass($type['name']);
if (in_array($type['name'], $this->abstractClasses)) {
$typeClass->setAbstract();
}
if (array_key_exists($type['name'], $this->extendedClasses)) {
$typeClass->addExtend($namespace . '\\' . $this->extendedClasses[$type['name']]);
$typeClass->setExtends($namespace . '\\' . $this->extendedClasses[$type['name']]);
} else {
$typeClass->addImplement($namespace . '\\TypeInterface');
}
@ -214,12 +218,19 @@ class StubCreator
$typeProperty = $typeClass->addProperty($fieldName)
->setPublic()
->setType($fieldTypes);
$default = $field['default'] ?? null;
if (!empty($default)) {
$typeProperty->setValue($default);
}
if ($field['optional']) {
$typeProperty->setNullable(true)
->setValue(null);
$typeProperty->setNullable();
if (!$typeProperty->isInitialized()) {
$typeProperty->setValue(null);
}
$fieldComments .= '|null';
}
if (!empty($fieldComments)) {
$fieldComments .= ' ' . $field['description'];
$typeProperty->addComment($fieldComments);
}
}
@ -229,16 +240,15 @@ class StubCreator
}
/**
* @return string
* @return PhpFile
*/
private function generateApi(): string
private function generateApi(): PhpFile
{
$file = new PhpFile;
$file->addComment('@noinspection PhpUnused');
$file->addComment('@noinspection PhpUnusedParameterInspection');
$phpNamespace = $file->addNamespace($this->namespace);
$apiClass = $phpNamespace->addClass('API')
->setTrait();
$apiClass = $phpNamespace->addTrait('API');
$sendRequest = $apiClass->addMethod('sendRequest')
->setPublic()
->setAbstract()
@ -252,6 +262,7 @@ class StubCreator
->setPublic()
->addBody('$args = get_defined_vars();')
->addBody('return $this->sendRequest(__FUNCTION__, $args);');
$function->addComment($method['description']);
$fields = $method['fields'];
usort(
$fields,
@ -260,28 +271,37 @@ class StubCreator
}
);
foreach ($fields as $field) {
$types = $this->parseApiFieldTypes($field['types'], $phpNamespace)['types'];
['types' => $types, 'comments' => $comment] = $this->parseApiFieldTypes($field['types'], $phpNamespace);
$fieldName = self::toCamelCase($field['name']);
$parameter = $function->addParameter($fieldName)
->setType($types);
if ($field['optional']) {
$parameter->setNullable()
->setDefaultValue(null);
$default = $field['default'] ?? null;
if (!empty($default) and (!is_string($default) or lcfirst($default) == $default)) {
$parameter->setDefaultValue($default);
}
if ($field['optional']) {
$parameter->setNullable();
if (!$parameter->hasDefaultValue()) {
$parameter->setDefaultValue(null);
}
$comment .= '|null';
}
$comment .= sprintf(' $%s %s', $fieldName, $field['description']);
$function->addComment($comment);
}
$returnTypes = $this->parseApiFieldTypes($method['return_types'], $phpNamespace)['types'];
['types' => $returnTypes, 'comments' => $returnComment] = $this->parseApiFieldTypes(
$method['return_types'],
$phpNamespace
);
$function->setReturnType($returnTypes);
$function->addComment(str_replace('param', 'return', $returnComment));
}
return $file;
}
/**
* @return array
* @return array{types: PhpFile[], api: PhpFile}
*/
#[ArrayShape([
'types' => "\Nette\PhpGenerator\PhpFile[]",
'api' => "string"
])]
public function generateCode(): array
{
return [

View File

@ -1,12 +1,9 @@
<?php
namespace TgScraper\Constants;
class Versions
{
public const V100 = '1.0.0';
public const V110 = '1.1.0';
public const V140 = '1.4.0';
@ -43,8 +40,10 @@ class Versions
public const V530 = '5.3.0';
public const V540 = '5.4.0';
public const V550 = '5.5.0';
public const V560 = '5.6.0';
public const V570 = '5.7.0';
public const LATEST = 'latest';
public const STABLE = self::V550;
public const STABLE = self::V570;
public const URLS = [
self::V100 => 'https://web.archive.org/web/20150714025308id_/https://core.telegram.org/bots/api/',
@ -83,6 +82,8 @@ class Versions
self::V530 => 'https://web.archive.org/web/20210626142851id_/https://core.telegram.org/bots/api',
self::V540 => 'https://web.archive.org/web/20211105152638id_/https://core.telegram.org/bots/api',
self::V550 => 'https://web.archive.org/web/20211211002657id_/https://core.telegram.org/bots/api',
self::V560 => 'https://web.archive.org/web/20220105131529id_/https://core.telegram.org/bots/api',
self::V570 => 'https://web.archive.org/web/20220206103922id_/https://core.telegram.org/bots/api',
self::LATEST => 'https://core.telegram.org/bots/api'
];
@ -101,5 +102,4 @@ class Versions
$version = self::getVersionFromText($text);
return self::URLS[$version] ?? self::URLS[self::LATEST];
}
}
}

165
src/Parsers/Field.php Normal file
View File

@ -0,0 +1,165 @@
<?php
namespace TgScraper\Parsers;
use JetBrains\PhpStorm\ArrayShape;
/**
* Class Field
* @package TgScraper\Parsers
*/
class Field
{
/**
* Parsed types map.
*/
public const TYPES = [
'Integer' => 'int',
'Float' => 'float',
'String' => 'string',
'Boolean' => 'bool',
'True' => 'bool',
'False' => 'bool'
];
/**
* @var string
*/
private string $name;
/**
* @var array
*/
private array $types;
/**
* @var FieldDescription
*/
private FieldDescription $description;
/**
* @var bool
*/
private bool $optional;
/**
* @var mixed
*/
private mixed $defaultValue;
/**
* @param string $name
* @param string $types
* @param bool $optional
* @param string $description
*/
public function __construct(string $name, string $types, bool $optional, string $description)
{
$this->name = $name;
$this->types = $this->parseTypesString($types);
$this->optional = $optional;
$this->description = new FieldDescription($description);
}
/**
* @param string $type
* @return string
*/
private function parseTypeString(string $type): string
{
if ($type == 'True') {
$this->defaultValue = true;
return self::TYPES['Boolean'];
} elseif ($type == 'False') {
$this->defaultValue = false;
return self::TYPES['Boolean'];
}
$type = trim(str_replace('number', '', $type));
return trim(str_replace(array_keys(self::TYPES), array_values(self::TYPES), $type));
}
/**
* @param string $text
* @return array
*/
private function parseTypesString(string $text): array
{
$types = [];
$parts = explode(' or ', $text);
foreach ($parts as $part) {
$part = trim(str_replace(' and', ',', $part));
$arrays = 0;
while (stripos($part, 'array of') === 0) {
$part = substr($part, 9);
$arrays++;
}
$pieces = explode(',', $part);
foreach ($pieces as $index => $piece) {
$pieces[$index] = $this->parseTypeString($piece);
}
$type = implode('|', $pieces);
for ($i = 0; $i < $arrays; $i++) {
$type = sprintf('Array<%s>', $type);
}
$types[] = $type;
}
return $types;
}
/**
* @return string
*/
public function getName(): string
{
return $this->name;
}
/**
* @return array
*/
public function getTypes(): array
{
return $this->types;
}
/**
* @return bool
*/
public function isOptional(): bool
{
return $this->optional;
}
/**
* @return mixed
*/
public function getDefaultValue(): mixed
{
if (!isset($this->defaultValue)) {
$this->defaultValue = $this->description->getDefaultValue();
}
return $this->defaultValue;
}
/**
* @return array
*/
#[ArrayShape([
'name' => "string",
'types' => "array",
'optional' => "bool",
'description' => "string",
'default' => "mixed"
])] public function toArray(): array
{
$result = [
'name' => $this->name,
'types' => $this->types,
'optional' => $this->optional,
'description' => (string)$this->description,
];
$defaultValue = $this->getDefaultValue();
if (null !== $defaultValue) {
$result['default'] = $defaultValue;
}
return $result;
}
}

View File

@ -0,0 +1,67 @@
<?php
namespace TgScraper\Parsers;
use voku\helper\HtmlDomParser;
class FieldDescription
{
private HtmlDomParser $dom;
public function __construct(string $description)
{
$this->dom = HtmlDomParser::str_get_html($description);
foreach ($this->dom->find('.emoji') as $emoji) {
$emoji->outerhtml .= $emoji->getAttribute('alt');
}
}
public function __toString()
{
return htmlspecialchars_decode($this->dom->text(), ENT_QUOTES);
}
public function getDefaultValue(): mixed
{
$description = (string)$this;
if (stripos($description, 'must be') !== false) {
$text = explode('must be ', $this->dom->html())[1] ?? '';
if (!empty($text)) {
$text = explode(' ', $text)[0];
$dom = HtmlDomParser::str_get_html($text);
$element = $dom->findOneOrFalse('em');
if ($element !== false) {
return $element->text();
}
}
}
$offset = stripos($description, 'defaults to');
if ($offset === false) {
return null;
}
$description = substr($description, $offset + 12);
$parts = explode(' ', $description, 2);
$value = $parts[0];
if (str_ends_with($value, '.') or str_ends_with($value, ',')) {
$value = substr($value, 0, -1);
}
if (str_starts_with($value, '“') and str_ends_with($value, '”')) {
return str_replace(['“', '”'], ['', ''], $value);
}
if (is_numeric($value)) {
return (int)$value;
}
if (strtolower($value) == 'true') {
return true;
}
if (strtolower($value) == 'false') {
return false;
}
if ($value === ucfirst($value)) {
return $value;
}
return null;
}
}

View File

@ -0,0 +1,76 @@
<?php
namespace TgScraper\Parsers;
use voku\helper\HtmlDomParser;
/**
* Class ObjectDescription
* @package TgScraper\Parsers
*/
class ObjectDescription
{
/**
* @var array
*/
private array $types;
/**
* @param string $description
*/
public function __construct(string $description)
{
$this->types = self::parseReturnTypes($description);
}
/**
* @param string $description
* @return array
*/
private static function parseReturnTypes(string $description): array
{
$returnTypes = [];
$phrases = explode('.', $description);
$phrases = array_filter(
$phrases,
function ($phrase) {
return (false !== stripos($phrase, 'returns') or false !== stripos($phrase, 'is returned'));
}
);
foreach ($phrases as $phrase) {
$dom = HtmlDomParser::str_get_html($phrase);
$a = $dom->findMulti('a');
$em = $dom->findMulti('em');
foreach ($a as $element) {
if ($element->text() == 'Messages') {
$returnTypes[] = 'Array<Message>';
continue;
}
$arrays = substr_count(strtolower($phrase), 'array');
$returnType = $element->text();
for ($i = 0; $i < $arrays; $i++) {
$returnType = sprintf('Array<%s>', $returnType);
}
$returnTypes[] = $returnType;
}
foreach ($em as $element) {
if (in_array($element->text(), ['False', 'force', 'Array'])) {
continue;
}
$type = str_replace(['True', 'Int', 'String'], ['bool', 'int', 'string'], $element->text());
$returnTypes[] = $type;
}
}
return $returnTypes;
}
/**
* @return array
*/
public function getTypes(): array
{
return $this->types;
}
}

View File

@ -3,17 +3,10 @@
namespace TgScraper;
use Exception;
use GuzzleHttp\Exception\GuzzleException;
use InvalidArgumentException;
use JetBrains\PhpStorm\ArrayShape;
use JsonException;
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\ContentLengthException;
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\NotLoadedException;
use PHPHtmlParser\Exceptions\ParentNotFoundException;
use PHPHtmlParser\Exceptions\StrictException;
use Psr\Http\Client\ClientExceptionInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\Yaml\Yaml;
use TgScraper\Common\OpenApiGenerator;
@ -65,15 +58,7 @@ class TgScraper
/**
* @param LoggerInterface $logger
* @param string $url
* @return static
* @throws ChildNotFoundException
* @throws CircularException
* @throws ClientExceptionInterface
* @throws ContentLengthException
* @throws LogicalException
* @throws NotLoadedException
* @throws ParentNotFoundException
* @throws StrictException
* @return self
* @throws Throwable
*/
public static function fromUrl(LoggerInterface $logger, string $url): self
@ -86,16 +71,9 @@ class TgScraper
/**
* @param LoggerInterface $logger
* @param string $version
* @return static
* @throws ChildNotFoundException
* @throws CircularException
* @throws ClientExceptionInterface
* @throws ContentLengthException
* @throws LogicalException
* @throws NotLoadedException
* @throws ParentNotFoundException
* @throws StrictException
* @throws Throwable
* @return self
* @throws Exception
* @throws GuzzleException
*/
public static function fromVersion(LoggerInterface $logger, string $version = Versions::LATEST): self
{
@ -243,7 +221,7 @@ class TgScraper
$formData = [];
if (!empty($method['fields'])) {
foreach ($method['fields'] as $field) {
$formData[] = [
$data = [
'key' => $field['name'],
'disabled' => $field['optional'],
'description' => sprintf(
@ -253,6 +231,11 @@ class TgScraper
),
'type' => 'text'
];
$default = $field['default'] ?? null;
if (!empty($default)) {
$data['value'] = (string)$default;
}
$formData[] = $data;
}
}
$result['item'][] = [