Process txt files instead of CSV

Generate salt and write with n,r,p parameters as a first line of the
outfile.
master
Антон Касимов 2021-09-09 14:10:09 +03:00
parent 2234afe560
commit f1466e3a4e
Signed by: toxa
GPG Key ID: CC3C1E3EA2534D0C
1 changed files with 29 additions and 21 deletions

View File

@ -2,15 +2,14 @@
"""Pseudonymize data."""
import argparse
import json
import base64
import sys
from csv import DictReader
from secrets import token_bytes
try:
from hashlib import scrypt
except ImportError:
from scrypt import hash
def scrypt(password, salt, n, r, p):
return hash(password, salt, n, r, p)
@ -18,7 +17,7 @@ except ImportError:
MIN_SALT_LENGTH = 16
arg_parser = argparse.ArgumentParser(
description='Apply scrypt to every row of csv file',
description='Apply scrypt to every row of txt file. Salt, N, r and p parameters will be written as a first line',
epilog=(
'Please use hashes only for reconciliation. ' +
'Do not store them more than it is needed for reconciliation. ' +
@ -51,17 +50,18 @@ arg_parser.add_argument(
help='parallelization factor (default: {0})'.format(DEFAULT_SCRYPT_P),
)
arg_parser.add_argument(
'salt',
default=sys.stdin,
type=lambda in_str: in_str.encode('utf-8'),
help='salt for scrypt (minimum: 16 bytes)',
'-s',
'--salt',
default=token_bytes(32),
type=lambda in_str: base64.b64decode(in_str),
help='base64 encoded salt for scrypt (minimum: 16 bytes)',
)
arg_parser.add_argument(
'infile',
nargs='?',
type=argparse.FileType('r'),
default=sys.stdin,
help='Comma Separated Values file (CSV) with header (default: stdin)',
help='text file to be hashed line by line (default: stdin)',
)
arg_parser.add_argument(
'outfile',
@ -81,16 +81,24 @@ if len(args.salt) < MIN_SALT_LENGTH:
raise ValidationException('Salt shall be longer than 16 bytes')
with args.infile:
reader = DictReader(args.infile)
with args.outfile:
for row in reader:
args.outfile.writelines((
scrypt(
json.dumps(row, sort_keys=True).lower().encode('utf-8'),
salt=args.salt,
n=args.n,
r=args.r,
p=args.p,
).hex(),
'\n',
))
args.outfile.write(
' '.join((
base64.b64encode(args.salt).decode('ascii'),
str(args.n),
str(args.r),
str(args.p),
)) + '\n',
)
for row in args.infile:
args.outfile.write(
base64.b85encode(
scrypt(
row.rstrip('\n').encode('utf-8'),
salt=args.salt,
n=args.n,
r=args.r,
p=args.p,
),
).decode('ascii') + '\n',
)