# coding: utf-8 """Pseudonymize data.""" import argparse import base64 import sys from secrets import token_bytes try: from hashlib import scrypt except ImportError: from scrypt import hash def scrypt(password, salt, n, r, p): return hash(password, salt, n, r, p) MIN_SALT_LENGTH = 16 arg_parser = argparse.ArgumentParser( description='Apply scrypt to every row of txt file. Salt, N, r and p parameters will be written as a first line', epilog=( 'Please use hashes only for reconciliation. ' + 'Do not store them more than it is needed for reconciliation. ' + 'Please contact your Data Privacy Officer for more information' ), ) # Recommended scrypt parameters # see http://www.tarsnap.com/scrypt/scrypt.pdf DEFAULT_SCRYPT_N = 1 << 14 # noqa: WPS432 DEFAULT_SCRYPT_R = 8 DEFAULT_SCRYPT_P = 1 arg_parser.add_argument( '-n', type=int, default=DEFAULT_SCRYPT_N, help='cost factor (default: {0})'.format(DEFAULT_SCRYPT_N), ) arg_parser.add_argument( '-r', type=int, default=DEFAULT_SCRYPT_R, help='block-size factor (default: {0})'.format(DEFAULT_SCRYPT_R), ) arg_parser.add_argument( '-p', type=int, default=DEFAULT_SCRYPT_P, help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P), ) arg_parser.add_argument( '-s', '--salt', default=token_bytes(32), type=lambda in_str: base64.b64decode(in_str), help='base64 encoded salt for scrypt (minimum: 16 bytes)', ) arg_parser.add_argument( 'infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='text file to be hashed line by line (default: stdin)', ) arg_parser.add_argument( 'outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='file to write hashes (default: stdout)', ) args = arg_parser.parse_args() class ValidationException(Exception): """Argument validation exception.""" if len(args.salt) < MIN_SALT_LENGTH: raise ValidationException('Salt shall be longer than 16 bytes') with args.infile: with args.outfile: print( base64.b64encode(args.salt).decode('ascii'), str(args.n), str(args.r), str(args.p), ) for row in args.infile: args.outfile.write( base64.b85encode( scrypt( row.rstrip('\n').encode('utf-8'), salt=args.salt, n=args.n, r=args.r, p=args.p, ), ).decode('ascii') + '\n', )