Skip to content

Commit

Permalink
remove old SpecialTokens
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffrey committed May 24, 2024
1 parent e23ec2d commit 2da0d3b
Showing 1 changed file with 0 additions and 7 deletions.
7 changes: 0 additions & 7 deletions open_lm/datapreprocess/ray/tokenize_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,6 @@ def process_keys(data, tokenizer, seqlen, seed, content_key, do_sample, sources=
fh.close()


class SpecialTokens(Enum):
END_OF_TEXT = 0
PAD = -1
END_OF_DOCUMENT = -2


def parse_s3_path(s3_path):
"""
Extract the bucket and key from an S3 path.
Expand Down Expand Up @@ -635,7 +629,6 @@ def main(args):
)
num_nodes = len(ray.nodes())

SpecialTokens = enum.Enum
Sources = enum.Enum("Sources", {item["source"]: index for index, item in enumerate(data["sources"])})

input_folders = args.input.split(",")
Expand Down

0 comments on commit 2da0d3b

Please sign in to comment.