This commit is contained in:
pukkandan 2022-07-15 21:44:07 +05:30
parent a904a7f8c6
commit 88f60feb32
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
3 changed files with 15 additions and 43 deletions

View File

@ -1161,14 +1161,11 @@ # Save all videos under YouTube directory in your home directory
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
### Specifying encoding of config files ### Config file encoding
By default, config files are read in the encoding from system locale. The config files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
If you saved your config file in a different encoding than that, you may write `# coding: ENCODING` to the beginning of the file. (e.g. `# coding: shift-jis`)
There must not be any characters before that, including spaces. If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
If you have BOM enabled, it will be used instead.
### Authentication with `.netrc` file ### Authentication with `.netrc` file

View File

@ -1831,24 +1831,16 @@ def test_determine_file_encoding(self):
self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4)) self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2)) self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-'), ('cp932', 0)) self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2))
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-\n'), ('cp932', 0))
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-\r\n'), ('cp932', 0))
self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0)) self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932'), ('cp932', 0)) self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0))
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932\n'), ('cp932', 0)) self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0))
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932\r\n'), ('cp932', 0))
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932,euc-jp\r\n'), ('cp932', 0))
self.assertEqual(determine_file_encoding( self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
b'\0\0\0#\0\0\0 \0\0\0c\0\0\0o\0\0\0d\0\0\0i\0\0\0n\0\0\0g\0\0\0:\0\0\0 \0\0\0u\0\0\0t\0\0\0f\0\0\0-\0\0\x003\0\0\x002\0\0\0-\0\0\0b\0\0\0e'), self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
('utf-32-be', 0))
self.assertEqual(determine_file_encoding(
b'#\0 \0c\0o\0d\0i\0n\0g\0:\0 \0u\0t\0f\0-\x001\x006\0-\0l\0e\0'),
('utf-16-le', 0))
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -3485,6 +3485,7 @@ def age_restricted(content_limit, age_limit):
return age_limit < content_limit return age_limit < content_limit
# List of known byte-order-marks (BOM)
BOMS = [ BOMS = [
(b'\xef\xbb\xbf', 'utf-8'), (b'\xef\xbb\xbf', 'utf-8'),
(b'\x00\x00\xfe\xff', 'utf-32-be'), (b'\x00\x00\xfe\xff', 'utf-32-be'),
@ -3492,7 +3493,6 @@ def age_restricted(content_limit, age_limit):
(b'\xff\xfe', 'utf-16-le'), (b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'), (b'\xfe\xff', 'utf-16-be'),
] ]
""" List of known byte-order-marks (BOM) """
def is_html(first_bytes): def is_html(first_bytes):
@ -5398,37 +5398,20 @@ def read_stdin(what):
def determine_file_encoding(data): def determine_file_encoding(data):
""" """
From the first 512 bytes of a given file, Detect the text encoding used
it tries to detect the encoding to be used to read as text.
@returns (encoding, bytes to skip) @returns (encoding, bytes to skip)
""" """
# BOM marks are given priority over declarations
for bom, enc in BOMS: for bom, enc in BOMS:
# matching BOM beats any declaration
# BOMs are skipped to prevent any errors
if data.startswith(bom): if data.startswith(bom):
return enc, len(bom) return enc, len(bom)
# strip off all null bytes to match even when UTF-16 or UTF-32 is used # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
# endians don't matter # We ignore the endianness to get a good enough match
data = data.replace(b'\0', b'') data = data.replace(b'\0', b'')
mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
PREAMBLES = [ return mobj.group(1).decode() if mobj else None, 0
# "# -*- coding: utf-8 -*-"
# "# coding: utf-8"
rb'(?m)^#(?:\s+-\*-)?\s*coding\s*:\s*(?P<encoding>\S+)(?:\s+-\*-)?\s*$',
# "# vi: set fileencoding=utf-8"
rb'^#\s+vi\s*:\s+set\s+fileencoding=(?P<encoding>[^\s,]+)'
]
for pb in PREAMBLES:
mobj = re.match(pb, data)
if not mobj:
continue
# preambles aren't skipped since they're just ignored when reading as config
return mobj.group('encoding').decode(), 0
return None, 0
class Config: class Config: