gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / requests / packages / chardet / chardetect.py @ 564
History | View | Annotate | Download (2.45 KB)
1 |
#!/usr/bin/env python
|
---|---|
2 |
"""
|
3 |
Script which takes one or more file paths and reports on their detected
|
4 |
encodings
|
5 |
|
6 |
Example::
|
7 |
|
8 |
% chardetect somefile someotherfile
|
9 |
somefile: windows-1252 with confidence 0.5
|
10 |
someotherfile: ascii with confidence 1.0
|
11 |
|
12 |
If no paths are provided, it takes its input from stdin.
|
13 |
|
14 |
"""
|
15 |
|
16 |
from __future__ import absolute_import, print_function, unicode_literals |
17 |
|
18 |
import argparse |
19 |
import sys |
20 |
from io import open |
21 |
|
22 |
from chardet import __version__ |
23 |
from chardet.universaldetector import UniversalDetector |
24 |
|
25 |
|
26 |
def description_of(lines, name='stdin'): |
27 |
"""
|
28 |
Return a string describing the probable encoding of a file or
|
29 |
list of strings.
|
30 |
|
31 |
:param lines: The lines to get the encoding of.
|
32 |
:type lines: Iterable of bytes
|
33 |
:param name: Name of file or collection of lines
|
34 |
:type name: str
|
35 |
"""
|
36 |
u = UniversalDetector() |
37 |
for line in lines: |
38 |
u.feed(line) |
39 |
u.close() |
40 |
result = u.result |
41 |
if result['encoding']: |
42 |
return '{0}: {1} with confidence {2}'.format(name, result['encoding'], |
43 |
result['confidence'])
|
44 |
else:
|
45 |
return '{0}: no result'.format(name) |
46 |
|
47 |
|
48 |
def main(argv=None): |
49 |
'''
|
50 |
Handles command line arguments and gets things started.
|
51 |
|
52 |
:param argv: List of arguments, as if specified on the command-line.
|
53 |
If None, ``sys.argv[1:]`` is used instead.
|
54 |
:type argv: list of str
|
55 |
'''
|
56 |
# Get command line arguments
|
57 |
parser = argparse.ArgumentParser( |
58 |
description="Takes one or more file paths and reports their detected \
|
59 |
encodings",
|
60 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
61 |
conflict_handler='resolve')
|
62 |
parser.add_argument('input',
|
63 |
help='File whose encoding we would like to determine.',
|
64 |
type=argparse.FileType('rb'), nargs='*', |
65 |
default=[sys.stdin]) |
66 |
parser.add_argument('--version', action='version', |
67 |
version='%(prog)s {0}'.format(__version__))
|
68 |
args = parser.parse_args(argv) |
69 |
|
70 |
for f in args.input: |
71 |
if f.isatty():
|
72 |
print("You are running chardetect interactively. Press " +
|
73 |
"CTRL-D twice at the start of a blank line to signal the " +
|
74 |
"end of your input. If you want help, run chardetect " +
|
75 |
"--help\n", file=sys.stderr)
|
76 |
print(description_of(f, f.name)) |
77 |
|
78 |
|
79 |
if __name__ == '__main__': |
80 |
main() |