101 lines
3.1 KiB
Python
101 lines
3.1 KiB
Python
"""Example for reading data from encoded text files.
|
|
|
|
Demonstrates how to handle setting the proper encoding for
|
|
UTF-8, UTF-16-LE, and UTF-16-BE with the ability to easily
|
|
expand to support checking other file magic values/signatures.
|
|
|
|
Example Usage:
|
|
|
|
``$ python open_files.py``
|
|
|
|
References:
|
|
|
|
* https://docs.python.org/3/library/io.html
|
|
|
|
|
|
Open files with proper encoding
|
|
===============================
|
|
|
|
This first function shows an example of opening a file after checking for a
|
|
byte-order mark (BOM). While this method could be expanded to check for a file's
|
|
magic value/file signature, this low-tech method will help with parsing a
|
|
collection of files that may be UTF-8, UTF-16-LE, and UTF-16-BE, three very
|
|
common text file encodings. Feel free to build and share on this.
|
|
|
|
.. literalinclude:: ../sections/section_01/open_files.py
|
|
:pyobject: open_file
|
|
|
|
Docstring References
|
|
====================
|
|
"""
|
|
|
|
from io import open
|
|
import os
|
|
|
|
"""
|
|
Copyright 2019 Chapin Bryce
|
|
|
|
Permission is hereby granted, free of charge, to any person
|
|
obtaining a copy of this software and associated documentation
|
|
files (the "Software"), to deal in the Software without
|
|
restriction, including without limitation the rights to use, copy,
|
|
modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
"""
|
|
|
|
__author__ = 'Chapin Bryce'
|
|
__date__ = 20191103
|
|
__license__ = 'MIT Copyright 2019 Chapin Bryce'
|
|
__desc__ = '''Sample script to read encoded text files.'''
|
|
__docs__ = [
|
|
'https://docs.python.org/3/library/csv.html',
|
|
'https://docs.python.org/3/library/os.html'
|
|
]
|
|
|
|
|
|
def open_file(input_file):
|
|
"""Opens an encoded text file and prints the contents
|
|
|
|
Arguments:
|
|
input_file (str): Path to file to open
|
|
"""
|
|
|
|
test_encoding = open(input_file, 'rb')
|
|
bom = test_encoding.read(2)
|
|
file_encoding = 'utf-8'
|
|
if bom == b'FEFF':
|
|
file_encoding = 'utf-16-le'
|
|
elif bom == b'FFFE':
|
|
file_encoding = 'utf-16-be'
|
|
|
|
with open(input_file, 'r', encoding=file_encoding) as fopen:
|
|
for raw_line in fopen:
|
|
line = raw_line.strip()
|
|
|
|
print(line)
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser(
|
|
description=__desc__,
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
epilog=f"Built by {__author__}, v.{__date__}"
|
|
)
|
|
parser.add_argument('INPUT_FILE', help="Text file to read")
|
|
args = parser.parse_args()
|
|
|
|
open_file(args.INPUT_FILE)
|