diff --git a/ipwb/__main__.py b/ipwb/__main__.py index a57c22ab..a767c316 100644 --- a/ipwb/__main__.py +++ b/ipwb/__main__.py @@ -43,16 +43,16 @@ def checkArgs_replay(args): random.seed() # Write data to temp file (sub-optimal) - tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample( - string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj' - with open(tempFilePath, 'w') as f: - f.write(cdxjIn) - args.index = tempFilePath + + tf = tempfile.NamedTemporaryFile(mode='w', suffix='.cdxj') + tf.write(cdxjIn) + args.index = tf.name + tf.close() suppliedIndexParameter = True proxy = None if hasattr(args, 'proxy') and args.proxy is not None: - print('Proxying to ' + args.proxy) + print(f'Proxying to {args.proxy}') proxy = args.proxy # TODO: add any other sub-arguments for replay here @@ -60,7 +60,8 @@ def checkArgs_replay(args): replay.start(cdxjFilePath=args.index, proxy=proxy) else: print('ERROR: An index file must be specified if not piping, e.g.,') - print('> ipwb replay /path/to/your/index.cdxj\n') + print(("> ipwb replay " + f"{os.path.join('path', 'to', 'your', 'index.cdxj')}\n")) args.onError() sys.exit() diff --git a/ipwb/util.py b/ipwb/util.py index 2bc7451e..63767503 100644 --- a/ipwb/util.py +++ b/ipwb/util.py @@ -36,7 +36,7 @@ (IPWBREPLAY_HOST, IPWBREPLAY_PORT) = IPWBREPLAY_ADDRESS.split(':') IPWBREPLAY_PORT = int(IPWBREPLAY_PORT) -INDEX_FILE = 'samples/indexes/salam-home.cdxj' +INDEX_FILE = os.path.join('samples', 'indexes', 'salam-home.cdxj') log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) @@ -219,9 +219,10 @@ def fetch_remote_file(path): # IPFS Config manipulation from here on out. def readIPFSConfig(): - ipfsConfigPath = expanduser("~") + '/.ipfs/config' + ipfsConfigPath = os.path.join(expanduser("~"), '.ipfs', 'config') if 'IPFS_PATH' in os.environ: - ipfsConfigPath = os.environ.get('IPFS_PATH') + '/config' + ipfsConfigPath = os.path.join( + os.environ.get('IPFS_PATH'), 'config') try: with open(ipfsConfigPath, 'r') as f: @@ -234,9 +235,10 @@ def readIPFSConfig(): def writeIPFSConfig(jsonToWrite): - ipfsConfigPath = expanduser("~") + '/.ipfs/config' + ipfsConfigPath = os.path.join(expanduser("~"), '.ipfs', 'config') if 'IPFS_PATH' in os.environ: - ipfsConfigPath = os.environ.get('IPFS_PATH') + '/config' + ipfsConfigPath = os.path.join( + os.environ.get('IPFS_PATH'), 'config') with open(ipfsConfigPath, 'w') as f: f.write(json.dumps(jsonToWrite, indent=4, sort_keys=True)) diff --git a/tests/testUtil.py b/tests/testUtil.py index b378848b..96a3b168 100644 --- a/tests/testUtil.py +++ b/tests/testUtil.py @@ -11,6 +11,7 @@ from ipwb import __file__ as moduleLocation from multiprocessing import Process +from pathlib import Path p = Process() @@ -18,8 +19,9 @@ def createUniqueWARC(): lines = [] warcInFilename = 'frogTest.warc' - warcInPath = os.path.join(os.path.dirname(__file__) + - '/../samples/warcs/' + warcInFilename) + warcInPath = os.path.join( + Path(os.path.dirname(__file__)).parent, + 'samples', 'warcs', warcInFilename) stringToChange = b'abcdefghijklmnopqrstuvwxz' randomString = getRandomString(len(stringToChange)) @@ -30,8 +32,10 @@ def createUniqueWARC(): warcOutFilename = warcInFilename.replace('.warc', '_' + randomString + '.warc') - warcOutPath = os.path.join(os.path.dirname(__file__) + - '/../samples/warcs/' + warcOutFilename) + warcOutPath = os.path.join( + Path(os.path.dirname(__file__)).parent, + 'samples', 'warcs', warcOutFilename) + print(warcOutPath) with open(warcOutPath, 'wb') as warcFile: warcFile.write(newContent) @@ -55,12 +59,13 @@ def countCDXJEntries(cdxjData): def startReplay(warcFilename): global p - pathOfWARC = os.path.join(os.path.dirname(__file__) + - '/../samples/warcs/' + warcFilename) - tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample( - string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj' + pathOfWARC = os.path.join( + Path(os.path.dirname(__file__)).parent, + 'samples', 'warcs', warcFilename) - open(tempFilePath, 'a').close() # Create placeholder file for replay + tf = tempfile.NamedTemporaryFile(mode='a', suffix='.cdxj') + tempFilePath = tf.name + tf.close() p = Process(target=replay.start, args=[tempFilePath]) p.start() diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 0f8edc4e..c16c6084 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -6,6 +6,8 @@ from ipwb import indexer +from pathlib import Path + def test_cdxj_warc_responseRecordCount(): newWARCPath = ipwbTest.createUniqueWARC() @@ -18,8 +20,9 @@ def test_cdxj_warc_responseRecordCount(): # A response record's content-length causes the payload to truncate # WARC-Response record for html should still exist in output def test_warc_ipwbIndexerBrokenWARCRecord(): - pathOfBrokenWARC = os.path.join(os.path.dirname(__file__) + - '/../samples/warcs/broken.warc') + pathOfBrokenWARC = os.path.join( + Path(os.path.dirname(__file__)).parent, + 'samples', 'warcs', 'broken.warc') cdxjList = indexer.indexFileAt(pathOfBrokenWARC, quiet=True) cdxj = '\n'.join(cdxjList) assert ipwbTest.countCDXJEntries(cdxj) == 1