Skip to content

Commit

Permalink
Merge pull request #688 from oduwsdl/issue-687
Browse files Browse the repository at this point in the history
  • Loading branch information
machawk1 authored Jul 6, 2020
2 parents 1575b1e + 4268141 commit 51ce587
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 23 deletions.
15 changes: 8 additions & 7 deletions ipwb/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,25 @@ def checkArgs_replay(args):

random.seed()
# Write data to temp file (sub-optimal)
tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample(
string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj'
with open(tempFilePath, 'w') as f:
f.write(cdxjIn)
args.index = tempFilePath

tf = tempfile.NamedTemporaryFile(mode='w', suffix='.cdxj')
tf.write(cdxjIn)
args.index = tf.name
tf.close()
suppliedIndexParameter = True

proxy = None
if hasattr(args, 'proxy') and args.proxy is not None:
print('Proxying to ' + args.proxy)
print(f'Proxying to {args.proxy}')
proxy = args.proxy

# TODO: add any other sub-arguments for replay here
if suppliedIndexParameter:
replay.start(cdxjFilePath=args.index, proxy=proxy)
else:
print('ERROR: An index file must be specified if not piping, e.g.,')
print('> ipwb replay /path/to/your/index.cdxj\n')
print(("> ipwb replay "
f"{os.path.join('path', 'to', 'your', 'index.cdxj')}\n"))

args.onError()
sys.exit()
Expand Down
12 changes: 7 additions & 5 deletions ipwb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
(IPWBREPLAY_HOST, IPWBREPLAY_PORT) = IPWBREPLAY_ADDRESS.split(':')
IPWBREPLAY_PORT = int(IPWBREPLAY_PORT)

INDEX_FILE = 'samples/indexes/salam-home.cdxj'
INDEX_FILE = os.path.join('samples', 'indexes', 'salam-home.cdxj')

log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)
Expand Down Expand Up @@ -219,9 +219,10 @@ def fetch_remote_file(path):

# IPFS Config manipulation from here on out.
def readIPFSConfig():
ipfsConfigPath = expanduser("~") + '/.ipfs/config'
ipfsConfigPath = os.path.join(expanduser("~"), '.ipfs', 'config')
if 'IPFS_PATH' in os.environ:
ipfsConfigPath = os.environ.get('IPFS_PATH') + '/config'
ipfsConfigPath = os.path.join(
os.environ.get('IPFS_PATH'), 'config')

try:
with open(ipfsConfigPath, 'r') as f:
Expand All @@ -234,9 +235,10 @@ def readIPFSConfig():


def writeIPFSConfig(jsonToWrite):
ipfsConfigPath = expanduser("~") + '/.ipfs/config'
ipfsConfigPath = os.path.join(expanduser("~"), '.ipfs', 'config')
if 'IPFS_PATH' in os.environ:
ipfsConfigPath = os.environ.get('IPFS_PATH') + '/config'
ipfsConfigPath = os.path.join(
os.environ.get('IPFS_PATH'), 'config')

with open(ipfsConfigPath, 'w') as f:
f.write(json.dumps(jsonToWrite, indent=4, sort_keys=True))
Expand Down
23 changes: 14 additions & 9 deletions tests/testUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@
from ipwb import __file__ as moduleLocation

from multiprocessing import Process
from pathlib import Path

p = Process()


def createUniqueWARC():
lines = []
warcInFilename = 'frogTest.warc'
warcInPath = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/' + warcInFilename)
warcInPath = os.path.join(
Path(os.path.dirname(__file__)).parent,
'samples', 'warcs', warcInFilename)

stringToChange = b'abcdefghijklmnopqrstuvwxz'
randomString = getRandomString(len(stringToChange))
Expand All @@ -30,8 +32,10 @@ def createUniqueWARC():

warcOutFilename = warcInFilename.replace('.warc', '_' +
randomString + '.warc')
warcOutPath = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/' + warcOutFilename)
warcOutPath = os.path.join(
Path(os.path.dirname(__file__)).parent,
'samples', 'warcs', warcOutFilename)

print(warcOutPath)
with open(warcOutPath, 'wb') as warcFile:
warcFile.write(newContent)
Expand All @@ -55,12 +59,13 @@ def countCDXJEntries(cdxjData):

def startReplay(warcFilename):
global p
pathOfWARC = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/' + warcFilename)
tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample(
string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj'
pathOfWARC = os.path.join(
Path(os.path.dirname(__file__)).parent,
'samples', 'warcs', warcFilename)

open(tempFilePath, 'a').close() # Create placeholder file for replay
tf = tempfile.NamedTemporaryFile(mode='a', suffix='.cdxj')
tempFilePath = tf.name
tf.close()

p = Process(target=replay.start, args=[tempFilePath])
p.start()
Expand Down
7 changes: 5 additions & 2 deletions tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from ipwb import indexer

from pathlib import Path


def test_cdxj_warc_responseRecordCount():
newWARCPath = ipwbTest.createUniqueWARC()
Expand All @@ -18,8 +20,9 @@ def test_cdxj_warc_responseRecordCount():
# A response record's content-length causes the payload to truncate
# WARC-Response record for html should still exist in output
def test_warc_ipwbIndexerBrokenWARCRecord():
pathOfBrokenWARC = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/broken.warc')
pathOfBrokenWARC = os.path.join(
Path(os.path.dirname(__file__)).parent,
'samples', 'warcs', 'broken.warc')
cdxjList = indexer.indexFileAt(pathOfBrokenWARC, quiet=True)
cdxj = '\n'.join(cdxjList)
assert ipwbTest.countCDXJEntries(cdxj) == 1
Expand Down

0 comments on commit 51ce587

Please sign in to comment.