Process txt files instead of CSV
Generate salt and write with n,r,p parameters as a first line of the outfile.master
parent
2234afe560
commit
f1466e3a4e
50
userhash.py
50
userhash.py
|
@ -2,15 +2,14 @@
|
|||
"""Pseudonymize data."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import base64
|
||||
import sys
|
||||
from csv import DictReader
|
||||
from secrets import token_bytes
|
||||
|
||||
try:
|
||||
from hashlib import scrypt
|
||||
except ImportError:
|
||||
from scrypt import hash
|
||||
|
||||
def scrypt(password, salt, n, r, p):
|
||||
return hash(password, salt, n, r, p)
|
||||
|
||||
|
@ -18,7 +17,7 @@ except ImportError:
|
|||
MIN_SALT_LENGTH = 16
|
||||
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
description='Apply scrypt to every row of csv file',
|
||||
description='Apply scrypt to every row of txt file. Salt, N, r and p parameters will be written as a first line',
|
||||
epilog=(
|
||||
'Please use hashes only for reconciliation. ' +
|
||||
'Do not store them more than it is needed for reconciliation. ' +
|
||||
|
@ -51,17 +50,18 @@ arg_parser.add_argument(
|
|||
help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P),
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'salt',
|
||||
default=sys.stdin,
|
||||
type=lambda in_str: in_str.encode('utf-8'),
|
||||
help='salt for scrypt (minimum: 16 bytes)',
|
||||
'-s',
|
||||
'--salt',
|
||||
default=token_bytes(32),
|
||||
type=lambda in_str: base64.b64decode(in_str),
|
||||
help='base64 encoded salt for scrypt (minimum: 16 bytes)',
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'infile',
|
||||
nargs='?',
|
||||
type=argparse.FileType('r'),
|
||||
default=sys.stdin,
|
||||
help='Comma Separated Values file (CSV) with header (default: stdin)',
|
||||
help='text file to be hashed line by line (default: stdin)',
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'outfile',
|
||||
|
@ -81,16 +81,24 @@ if len(args.salt) < MIN_SALT_LENGTH:
|
|||
raise ValidationException('Salt shall be longer than 16 bytes')
|
||||
|
||||
with args.infile:
|
||||
reader = DictReader(args.infile)
|
||||
with args.outfile:
|
||||
for row in reader:
|
||||
args.outfile.writelines((
|
||||
scrypt(
|
||||
json.dumps(row, sort_keys=True).lower().encode('utf-8'),
|
||||
salt=args.salt,
|
||||
n=args.n,
|
||||
r=args.r,
|
||||
p=args.p,
|
||||
).hex(),
|
||||
'\n',
|
||||
))
|
||||
args.outfile.write(
|
||||
' '.join((
|
||||
base64.b64encode(args.salt).decode('ascii'),
|
||||
str(args.n),
|
||||
str(args.r),
|
||||
str(args.p),
|
||||
)) + '\n',
|
||||
)
|
||||
for row in args.infile:
|
||||
args.outfile.write(
|
||||
base64.b85encode(
|
||||
scrypt(
|
||||
row.rstrip('\n').encode('utf-8'),
|
||||
salt=args.salt,
|
||||
n=args.n,
|
||||
r=args.r,
|
||||
p=args.p,
|
||||
),
|
||||
).decode('ascii') + '\n',
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue