Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added option to symlink (ln -s) #48

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ Occasionally, you may have things that comprise more than a single file (e.g. pi
Set `group_prefix` to the length of the group (e.g. `2`).
But now _all_ files should be part of groups.

Set `move=True` if you want to move the files instead of copying.

Set
- `move=True` or `move='move'` if you want to move the files instead of copying.
- `move=False` or `move='copy'` if you want to copy the files. (default behavior)
- `move='symlink'` if you want to symlink(i.e create shortcuts `ln -s`) instead of copying
### CLI

```
Expand All @@ -114,6 +116,7 @@ Options:
--oversample enable oversampling of imbalanced datasets, works only with --fixed.
--group_prefix split files into equally-sized groups based on their prefix
--move move the files instead of copying
--symlink symlink(create shortcut) the files instead of copying
Example:
splitfolders --ratio .8 .1 .1 -- folder_with_images
```
Expand Down
13 changes: 11 additions & 2 deletions splitfolders/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def run():
parser = argparse.ArgumentParser(
description="Split folders with files (e.g. images) into training, validation and test(dataset) folders."
description="Split folders with files (e.g. images) by copying them into training, validation and test(dataset) folders."
)
parser.add_argument(
"--output",
Expand Down Expand Up @@ -41,18 +41,27 @@ def run():
default=None,
help="split files into equally-sized groups based on their prefix",
)
parser.add_argument(
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--move",
action="store_true",
help="move the files instead of copying",
)
group.add_argument(
"--symlink",
action="store_true",
help="symlink(create shortcut) the files instead of copying",
)
parser.add_argument(
"input",
help="directory with the input data. The directory needs to have the labels as sub-directories. In those sub-directories are then the actual files that gets split.",
)

args = parser.parse_args()

if args.symlink:
args.move = 'symlink'

if args.ratio:
ratio(
args.input, args.output, args.seed, args.ratio, args.group_prefix, args.move
Expand Down
41 changes: 35 additions & 6 deletions splitfolders/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from pathlib import Path
import random
import shutil
from os import path
from os import path, symlink

from .utils import list_dirs, list_files

Expand Down Expand Up @@ -150,8 +150,22 @@ def fixed(
if use_tqdm:
iteration = tqdm(iteration, desc="Oversampling", unit=" classes")

copy_fun = shutil.move if move else shutil.copy2
if move == 'move' or move is True:
copy_fun = shutil.move
elif move == 'copy' or move is False:
copy_fun = shutil.copy2
else:
copy_fun = symlink

def copyer(f_orig, f_dest):
if isinstance(move, bool) or move == 'move' or move == 'copy':
copy_fun(str(f_orig), str(f_dest))
else:
try:
copy_fun(f_orig.resolve(), f_dest.resolve())
except FileExistsError:
pass

for num_items, class_dir in iteration:
class_name = path.split(class_dir)[1]
full_path = path.join(output, "train", class_name)
Expand All @@ -169,7 +183,7 @@ def fixed(
for f_orig in f_chosen:
new_name = f_orig.stem + "_" + str(i) + f_orig.suffix
f_dest = f_orig.with_name(new_name)
copy_fun(str(f_orig), str(f_dest))
copyer(f_orig, f_dest)


def group_by_prefix(files, len_pairs):
Expand Down Expand Up @@ -296,8 +310,23 @@ def copy_files(files_type, class_dir, output, prog_bar, move):
Copies the files from the input folder to the output folder
"""

copy_fun = shutil.move if move else shutil.copy2
if move == 'move' or move is True:
copy_fun = shutil.move
elif move == 'copy' or move is False:
copy_fun = shutil.copy2
else:
copy_fun = symlink

def copyer(base_file, full_path):
if isinstance(move, bool) or move == 'move' or move == 'copy':
copy_fun(str(base_file), str(full_path))
else:
try:
copy_fun(base_file.resolve(), path.join(full_path,
path.split(Path(base_file))[1]))
except FileExistsError:
pass

# get the last part within the file
class_name = path.split(class_dir)[1]
for (files, folder_type) in files_type:
Expand All @@ -309,6 +338,6 @@ def copy_files(files_type, class_dir, output, prog_bar, move):
prog_bar.update()
if type(f) == tuple:
for x in f:
copy_fun(str(x), str(full_path))
copyer(x, full_path)
else:
copy_fun(str(f), str(full_path))
copyer(f, full_path)