userhash/userhash.py

103 lines
2.6 KiB
Python

# coding: utf-8
"""Pseudonymize data."""
import argparse
import base64
import sys
from secrets import token_bytes
try:
from hashlib import scrypt
except ImportError:
from scrypt import hash
def scrypt(password, salt, n, r, p):
return hash(password, salt, n, r, p)
MIN_SALT_LENGTH = 16
arg_parser = argparse.ArgumentParser(
description='Apply scrypt to every row of txt file. Salt, N, r and p parameters will be written as a first line',
epilog=(
'Please use hashes only for reconciliation. ' +
'Do not store them more than it is needed for reconciliation. ' +
'Please contact your Data Privacy Officer for more information'
),
)
# Recommended scrypt parameters
# see http://www.tarsnap.com/scrypt/scrypt.pdf
DEFAULT_SCRYPT_N = 1 << 14 # noqa: WPS432
DEFAULT_SCRYPT_R = 8
DEFAULT_SCRYPT_P = 1
arg_parser.add_argument(
'-n',
type=int,
default=DEFAULT_SCRYPT_N,
help='cost factor (default: {0})'.format(DEFAULT_SCRYPT_N),
)
arg_parser.add_argument(
'-r',
type=int,
default=DEFAULT_SCRYPT_R,
help='block-size factor (default: {0})'.format(DEFAULT_SCRYPT_R),
)
arg_parser.add_argument(
'-p',
type=int,
default=DEFAULT_SCRYPT_P,
help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P),
)
arg_parser.add_argument(
'-s',
'--salt',
default=token_bytes(32),
type=lambda in_str: base64.b64decode(in_str),
help='base64 encoded salt for scrypt (minimum: 16 bytes)',
)
arg_parser.add_argument(
'infile',
nargs='?',
type=argparse.FileType('r'),
default=sys.stdin,
help='text file to be hashed line by line (default: stdin)',
)
arg_parser.add_argument(
'outfile',
nargs='?',
type=argparse.FileType('w'),
default=sys.stdout,
help='file to write hashes (default: stdout)',
)
args = arg_parser.parse_args()
class ValidationException(Exception):
"""Argument validation exception."""
if len(args.salt) < MIN_SALT_LENGTH:
raise ValidationException('Salt shall be longer than 16 bytes')
with args.infile:
with args.outfile:
print(
base64.b64encode(args.salt).decode('ascii'),
str(args.n),
str(args.r),
str(args.p),
)
for row in args.infile:
args.outfile.write(
base64.b85encode(
scrypt(
row.rstrip('\n').encode('utf-8'),
salt=args.salt,
n=args.n,
r=args.r,
p=args.p,
),
).decode('ascii') + '\n',
)