Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / requests / packages / chardet / chardetect.py @ 564

History | View | Annotate | Download (2.45 KB)

1
#!/usr/bin/env python
2
"""
3
Script which takes one or more file paths and reports on their detected
4
encodings
5

6
Example::
7

8
    % chardetect somefile someotherfile
9
    somefile: windows-1252 with confidence 0.5
10
    someotherfile: ascii with confidence 1.0
11

12
If no paths are provided, it takes its input from stdin.
13

14
"""
15

    
16
from __future__ import absolute_import, print_function, unicode_literals
17

    
18
import argparse
19
import sys
20
from io import open
21

    
22
from chardet import __version__
23
from chardet.universaldetector import UniversalDetector
24

    
25

    
26
def description_of(lines, name='stdin'):
27
    """
28
    Return a string describing the probable encoding of a file or
29
    list of strings.
30

31
    :param lines: The lines to get the encoding of.
32
    :type lines: Iterable of bytes
33
    :param name: Name of file or collection of lines
34
    :type name: str
35
    """
36
    u = UniversalDetector()
37
    for line in lines:
38
        u.feed(line)
39
    u.close()
40
    result = u.result
41
    if result['encoding']:
42
        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43
                                                     result['confidence'])
44
    else:
45
        return '{0}: no result'.format(name)
46

    
47

    
48
def main(argv=None):
49
    '''
50
    Handles command line arguments and gets things started.
51

52
    :param argv: List of arguments, as if specified on the command-line.
53
                 If None, ``sys.argv[1:]`` is used instead.
54
    :type argv: list of str
55
    '''
56
    # Get command line arguments
57
    parser = argparse.ArgumentParser(
58
        description="Takes one or more file paths and reports their detected \
59
                     encodings",
60
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61
        conflict_handler='resolve')
62
    parser.add_argument('input',
63
                        help='File whose encoding we would like to determine.',
64
                        type=argparse.FileType('rb'), nargs='*',
65
                        default=[sys.stdin])
66
    parser.add_argument('--version', action='version',
67
                        version='%(prog)s {0}'.format(__version__))
68
    args = parser.parse_args(argv)
69

    
70
    for f in args.input:
71
        if f.isatty():
72
            print("You are running chardetect interactively. Press " +
73
                  "CTRL-D twice at the start of a blank line to signal the " +
74
                  "end of your input. If you want help, run chardetect " +
75
                  "--help\n", file=sys.stderr)
76
        print(description_of(f, f.name))
77

    
78

    
79
if __name__ == '__main__':
80
    main()