Initial version

master
Антон Касимов 2021-08-09 17:44:03 +03:00
parent e71152d14a
commit 97512a397a
Signed by: toxa
GPG Key ID: CC3C1E3EA2534D0C
1 changed files with 96 additions and 0 deletions

96
pfizerhash.py Normal file
View File

@ -0,0 +1,96 @@
# coding: utf-8
"""Pseudonymize data."""
import argparse
import json
import sys
from csv import DictReader
try:
from hashlib import scrypt
except ImportError:
from scrypt import hash
def scrypt(password, salt, n, r, p):
return hash(password, salt, n, r, p)
MIN_SALT_LENGTH = 16
arg_parser = argparse.ArgumentParser(
description='Apply scrypt to every row of csv file',
epilog=(
'Please use hashes only for reconciliation. ' +
'Do not store them more than it is needed for reconciliation. ' +
'Please contact your Data Privacy Officer for more information'
),
)
# Recommended scrypt parameters
# see http://www.tarsnap.com/scrypt/scrypt.pdf
DEFAULT_SCRYPT_N = 1 << 14 # noqa: WPS432
DEFAULT_SCRYPT_R = 8
DEFAULT_SCRYPT_P = 1
arg_parser.add_argument(
'-n',
type=int,
default=DEFAULT_SCRYPT_N,
help='cost factor (default: {0})'.format(DEFAULT_SCRYPT_N),
)
arg_parser.add_argument(
'-r',
type=int,
default=DEFAULT_SCRYPT_R,
help='block-size factor (default: {0})'.format(DEFAULT_SCRYPT_R),
)
arg_parser.add_argument(
'-p',
type=int,
default=DEFAULT_SCRYPT_P,
help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P),
)
arg_parser.add_argument(
'salt',
default=sys.stdin,
type=lambda in_str: in_str.encode('utf-8'),
help='salt for scrypt (minimum: 16 bytes)',
)
arg_parser.add_argument(
'infile',
nargs='?',
type=argparse.FileType('r'),
default=sys.stdin,
help='Comma Separated Values file (CSV) with header (default: stdin)',
)
arg_parser.add_argument(
'outfile',
nargs='?',
type=argparse.FileType('w'),
default=sys.stdout,
help='file to write hashes (default: stdout)',
)
args = arg_parser.parse_args()
class ValidationException(Exception):
"""Argument validation exception."""
if len(args.salt) < MIN_SALT_LENGTH:
raise ValidationException('Salt shall be longer than 16 bytes')
with args.infile:
reader = DictReader(args.infile)
with args.outfile:
for row in reader:
args.outfile.writelines((
scrypt(
json.dumps(row, sort_keys=True).lower().encode('utf-8'),
salt=args.salt,
n=args.n,
r=args.r,
p=args.p,
).hex(),
'\n',
))