Skip to content

Commit

Permalink
Fixed bug with path column; dv_manifest_gen now handles recursion mor…
Browse files Browse the repository at this point in the history
…e like it should
  • Loading branch information
plesubc committed Sep 18, 2024
1 parent 694326f commit 54700d2
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/dataverse_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
'''
from dataverse_utils.dataverse_utils import *

VERSION = (0,13,0)
VERSION = (0,13,1)
__version__ = '.'.join([str(x) for x in VERSION])
23 changes: 13 additions & 10 deletions src/dataverse_utils/dataverse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,21 @@ def make_tsv(start_dir, in_list=None, def_tag='Data', # pylint: disable=too-many
if kwargs.get('path'):
headers.insert(1, 'path')
outf = io.StringIO(newline='')
tsv_writer = csv.writer(outf, delimiter='\t',
quoting=quotype
)
tsv_writer = csv.DictWriter(outf, delimiter='\t',
quoting=quotype,
fieldnames=headers,
extrasaction='ignore')
if inc_header:
tsv_writer.writerow(headers)
tsv_writer.writeheader()
for row in in_list:
desc = os.path.splitext(os.path.basename(row))[0]
if mime:
mtype = mimetypes.guess_type(row)[0]
tsv_writer.writerow([row, desc, def_tag, mtype])
else:
tsv_writer.writerow([row, desc, def_tag])
#the columns
r = {}
r['file'] = row
r['description'] = os.path.splitext(os.path.basename(row))[0]
r['mimetype'] = mimetypes.guess_type(row)[0]
r['tags'] = def_tag
r['path'] = ''
tsv_writer.writerow(r)
outf.seek(0)
outfile = outf.read()
outf.close()
Expand Down
36 changes: 19 additions & 17 deletions src/dataverse_utils/scripts/dv_manifest_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
'''

import argparse
import glob
import os
import pathlib
#pathlib new for Python 3.5
Expand All @@ -16,7 +17,7 @@

import dataverse_utils as du

VERSION = (0, 5, 0)
VERSION = (0, 5, 1)
__version__ = '.'.join([str(x) for x in VERSION])

def parse() -> argparse.ArgumentParser():
Expand Down Expand Up @@ -100,27 +101,28 @@ def main() -> None:
args.quote = quotype(args.quote)
if args.quote == -1:
parser.error('Invalid quotation type')
f_list = []

if not args.files:
args.files = [str(x) for x in pathlib.Path('./').glob('*')]
if args.show_hidden:
args.files += [str(x) for x in pathlib.Path('./').glob('.*')]
for fil in args.files:
finder = pathlib.Path(fil).expanduser()
if args.recursive and finder.is_dir():
f_list += list(finder.rglob('*'))
f_list = []
for file in args.files:
if not args.recursive:
f_list += glob.glob(file,
include_hidden=args.show_hidden)
else:
f_list += list(finder.parent.glob(finder.name))
#Set comprehension strips out duplicates
#Strip out hidden files and directories
if args.show_hidden:
f_list = {str(x) for x in f_list if x.is_file()}
else:
f_list = {str(x) for x in f_list if x.is_file() and not re.search(r'^\.[Aa9-Zz9]*', str(x))}
f_list += glob.glob(file+'/**', recursive=True,
include_hidden=args.show_hidden)
if not f_list and not args.files:
if not args.recursive:
f_list += glob.glob('./*', include_hidden=args.show_hidden)
else:
f_list += glob.glob('./**', recursive=True,
include_hidden=args.show_hidden)
f_list = [pathlib.Path(_) for _ in f_list]
f_list = [_ for _ in f_list if _.stem != '' and _.exists() and _.is_file()]
if not f_list:
#nothing to do
print('Nothing matching these criteria. No manifest generated')
sys.exit()

if args.filename:
du.dump_tsv(os.getcwd(), filename=args.filename,
in_list=f_list,
Expand Down

0 comments on commit 54700d2

Please sign in to comment.