#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""hash files using the selected algorithm hashing the uncompressed form by decompressing compressed files."""

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# this script was created to help illustrate uses of zopen()
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# algorithm is determined by command name or first argument
# make executable names (or links) with one
# preferred algorithm for each command name
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# beware of files that uncompress into huge content
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
import hashlib
import os
from ftrgen import ftrgen
from os import environ,sep
from sys import argv,stderr,stdin
from zopen import zopen

# some constants and settings
CR='\r'
NL='\n'
ETEOL='\x1b[K' # erase to end of line

# tune reading and size output
readsize = 2**24 # how many bytes to read
oversize = 2**48 # how big size looks like
whensize = 2**25 # after this many show size

error_max = 255 # default
var = 'ERROR_MAX'
if var in environ:
    val = environ[var]
    try:
        error_max = int(val)
    except ValueError:
        exit(f'bad int value for env var {var!r}: {val!r}')

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# function to handle error messages, print them and count them
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
def error(msg):
    global errorcount
    print(msg,file=stderr,flush=1)
    errorcount += 1
    return
errorcount = 0

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# function to find which algorithm is in a str
# returns name as a str if one is found
# returns count as an int if not one
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
def which_algorithm(word):
    for alg in hashlib.algorithms_available:
        if alg == word:
            return alg
    found = set()
    for alg in hashlib.algorithms_available:
        if alg in word:
            found.add(alg)
    count = len(found)
    if count == 1:
        return found.pop()
    return count

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# get command name and from it determine which hash algorithm was intended for this command
# if the command name has none then try the first argument as the hash algorithm
# hash algorithm names such as: md5 or sha256
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
exe = argv.pop(0)
if not argv:
    exit(print(NL.join(sorted(hashlib.algorithms_available))))
cmd = exe.rsplit(sep,1)[1] if sep in exe else exe
algorithm = which_algorithm(cmd)
ext = ''
if algorithm == 0:
    if argv:
        ext = ' or first argument'
        if argv[0][:5] == '--alg':
            exit(print(NL.join(sorted(hashlib.algorithms_available))))
        algorithm = which_algorithm(argv.pop(0))
    if isinstance(algorithm,int):
        if algorithm == 0:
            algorithm = 'no'
        exit(f'{algorithm} available hash algorithms match command name {cmd!r}{ext}')

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# gather all file names
# file names are read from stdin if not given in argv
# this code does not zhash stdin
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
count = len(argv)
names = []
try:
  for name1 in argv if count else stdin:
    for name in name1.splitlines():
        if os.path.isdir(name):
            for ascend,depth,path in ftrgen(name):
                if os.path.islink(path):
                    continue
                if os.path.isfile(path):
                    if path[:2] == './':
                        path = path[2:]
                    names.append(path)
        elif os.path.isfile(name):
            if name[:2] == './':
                name = name[2:]
            names.append(name)
except KeyboardInterrupt:
    exit(CR+'ouch!'+ETEOL+CR)

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# check all files for open failures before any are hashed (plain open() is used here, not zopen())
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
try:
    for name in names:

        try:
            with open(name,'rb') as file:
                continue
        except FileNotFoundError:
            error(f'file not found: {name!r}')
        except PermissionError:
            error(f'access denied: {name!r}')
        except IsADirectoryError:
            error(f'will not hash directory: {name!r}')
        except OSError: # everything else
            error(f'unknown error opening file: {name!r}')

except KeyboardInterrupt:
    exit(CR+'Ouch!'+ETEOL+CR)

if errorcount > error_max:
    exit(f'aborting due to {errorcount[0]} error{"s"[errorcount==1:]} (max {error_max!r})')

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
# hash all files
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
try:
  for name in names:
    try:
        total = oversize
        limit = oversize + whensize
        with zopen(name,'rb') as file: # decompression will be determined in zopen by extension(s) of file name
            hash_object = hashlib.new(algorithm)
            while True:
                if total >= limit:
                    print(f'{hex(total)[3:]} {name}'[:135],end=ETEOL+CR,file=stderr,flush=True)
                    limit += whensize
                data = file.read(readsize)
                if not data: # EOF
                    break
                hash_object.update(data)
                total += len(data)
            print(end=CR+ETEOL+CR,file=stderr,flush=True)
    except EOFError:
        continue
    except OSError:
        continue
    print(hash_object.hexdigest(),'*'+name,flush=True)
except KeyboardInterrupt:
    exit(CR+'OUCH!'+ETEOL+CR)
print(end=CR,file=stderr,flush=True)