ROOTPLOIT

File: //opt/gsutil/third_party/charset_normalizer/tests/test_detect_legacy.py
import unittest
from charset_normalizer.legacy import detect


class TestDetectLegacy(unittest.TestCase):

    def test_detect_dict_keys(self):

        r = detect(
            (u'\uFEFF' + '我没有埋怨，磋砣的只是一些时间。').encode('gb18030')
        )

        with self.subTest('encoding key present'):
            self.assertIn(
                'encoding',
                r.keys()
            )

        with self.subTest('language key present'):
            self.assertIn(
                'language',
                r.keys()
            )

        with self.subTest('confidence key present'):
            self.assertIn(
                'confidence',
                r.keys()
            )

    def test_detect_dict_value_type(self):

        r = detect(
            '我没有埋怨，磋砣的只是一些时间。'.encode('utf_8')
        )

        with self.subTest('encoding instance of str'):
            self.assertIsInstance(
                r['encoding'],
                str
            )

        with self.subTest('language instance of str'):
            self.assertIsInstance(
                r['language'],
                str
            )

        with self.subTest('confidence instance of float'):
            self.assertIsInstance(
                r['confidence'],
                float
            )

    def test_detect_dict_value(self):
        r = detect(
            '我没有埋怨，磋砣的只是一些时间。'.encode('utf_32')
        )

        with self.subTest('encoding is equal to utf_32'):
            self.assertEqual(
                r['encoding'],
                'UTF-32'
            )

    def test_utf8_sig_not_striped(self):
        r = detect(
            "Hello World".encode('utf-8-sig')
        )

        with self.subTest("Verify that UTF-8-SIG is returned when using legacy detect"):
            self.assertEqual(
                r['encoding'],
                "UTF-8-SIG"
            )