-
Notifications
You must be signed in to change notification settings - Fork 0
/
020_sort_files_usingDT.R
113 lines (86 loc) · 2.11 KB
/
020_sort_files_usingDT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# TAGS: sprintf, regex, mp3, DT,
#
#
# PURPOSE: Sort mp3 file names, by name, by size, using data.table
# USE CASE: Find dupicate files, histogram by size.
#
# THIS FILE:
# "tests/testthat/test_01_SORT_FILES.R#"
{
# load_all()
library(data.table)
library(kableExtra)
source("./90_find_replace_patterns.R")
}
#### base:: comands REF
{
if (F) {
file.rename()
file.create()
basename()
dirname()
list.dirs()
dir()
# list.files(<dir>)
}
}
#### Return dt with file name and siz (default = ".")
{
dt <- get_file_names()
# str(dt)
# dput(dt)
# dput(dt, control = c(NULL)) ## return as list
dt[, .N]
dt |> head() # [1] 4077
}
# ------------------- STOP HERE---------------------
# I# sort on size (displays all columns)
{
dt[order(-size)] |> View()
dt[order(size)] |> View()
setorder(dt, -size, name)
dt
## large files
dt[, .N]
dt[size > 50, .N]
dt[size > 20, .N]
dt[size > 50] %>% head(10L)
## most <10 MB
hist(dt$size)
}
## sort on name
{
setorder(dt, name, -size)
dt |> head()
View(dt) ## <C-Q>
}
## find files with SAME MB (and > 5MB) - BUG - need same NAME and MB
##
uniqueN(dt, by = c("size")) ## 3795 < 4707, so many with same size
### this is it!
dt[, .N, by = size][order(-N) && N > 1, ]
### working, but not what i want.
dt[order(size), .SD, by = size] |> View()
dt[order(size), .SD[.N], by = size]
dt[order(size), .(nrow(.SD)), by = size]
dt[order(size), .N, by = size]
# TODO - document w/ DT
### closer
duplicated(dt)
duplicated(x = dt, by = c("size"))
## return all dup rows
dt[duplicated(x = dt, by = c("size"))]
dt[, .(name, .N), by = size][N > 1] |> head()
dt[, .(name, .N), by = size][N > 1] |> View()
dt[, .(name), by = size] |> View()
### return int
uniqueN(dt, by = c("size"))
### return dt, unique values of size
u_dt <- unique(dt, by = "size")
u_dt
### inner join
join_dt <- merge(u_dt, dt, by.x = c("size"), by.y = c("size"))
join_dt[order(-size)] |> View()
dt[, .N, by = .(size)]
dt[, .N, by = .(size)][order(size)]
dt[, .N, by = size]