Initial version
parent
e71152d14a
commit
97512a397a
|
@ -0,0 +1,96 @@
|
|||
# coding: utf-8
|
||||
"""Pseudonymize data."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from csv import DictReader
|
||||
|
||||
try:
|
||||
from hashlib import scrypt
|
||||
except ImportError:
|
||||
from scrypt import hash
|
||||
|
||||
def scrypt(password, salt, n, r, p):
|
||||
return hash(password, salt, n, r, p)
|
||||
|
||||
|
||||
MIN_SALT_LENGTH = 16
|
||||
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
description='Apply scrypt to every row of csv file',
|
||||
epilog=(
|
||||
'Please use hashes only for reconciliation. ' +
|
||||
'Do not store them more than it is needed for reconciliation. ' +
|
||||
'Please contact your Data Privacy Officer for more information'
|
||||
),
|
||||
)
|
||||
|
||||
# Recommended scrypt parameters
|
||||
# see http://www.tarsnap.com/scrypt/scrypt.pdf
|
||||
DEFAULT_SCRYPT_N = 1 << 14 # noqa: WPS432
|
||||
DEFAULT_SCRYPT_R = 8
|
||||
DEFAULT_SCRYPT_P = 1
|
||||
|
||||
arg_parser.add_argument(
|
||||
'-n',
|
||||
type=int,
|
||||
default=DEFAULT_SCRYPT_N,
|
||||
help='cost factor (default: {0})'.format(DEFAULT_SCRYPT_N),
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'-r',
|
||||
type=int,
|
||||
default=DEFAULT_SCRYPT_R,
|
||||
help='block-size factor (default: {0})'.format(DEFAULT_SCRYPT_R),
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'-p',
|
||||
type=int,
|
||||
default=DEFAULT_SCRYPT_P,
|
||||
help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P),
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'salt',
|
||||
default=sys.stdin,
|
||||
type=lambda in_str: in_str.encode('utf-8'),
|
||||
help='salt for scrypt (minimum: 16 bytes)',
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'infile',
|
||||
nargs='?',
|
||||
type=argparse.FileType('r'),
|
||||
default=sys.stdin,
|
||||
help='Comma Separated Values file (CSV) with header (default: stdin)',
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'outfile',
|
||||
nargs='?',
|
||||
type=argparse.FileType('w'),
|
||||
default=sys.stdout,
|
||||
help='file to write hashes (default: stdout)',
|
||||
)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
|
||||
class ValidationException(Exception):
|
||||
"""Argument validation exception."""
|
||||
|
||||
|
||||
if len(args.salt) < MIN_SALT_LENGTH:
|
||||
raise ValidationException('Salt shall be longer than 16 bytes')
|
||||
|
||||
with args.infile:
|
||||
reader = DictReader(args.infile)
|
||||
with args.outfile:
|
||||
for row in reader:
|
||||
args.outfile.writelines((
|
||||
scrypt(
|
||||
json.dumps(row, sort_keys=True).lower().encode('utf-8'),
|
||||
salt=args.salt,
|
||||
n=args.n,
|
||||
r=args.r,
|
||||
p=args.p,
|
||||
).hex(),
|
||||
'\n',
|
||||
))
|
Loading…
Reference in New Issue