remove old SpecialTokens

mlfoundations · May 24, 2024 · 2da0d3b · 2da0d3b
1 parent e23ec2d
commit 2da0d3b
Showing 1 changed file with 0 additions and 7 deletions.
diff --git a/open_lm/datapreprocess/ray/tokenize_shuffle.py b/open_lm/datapreprocess/ray/tokenize_shuffle.py
@@ -351,12 +351,6 @@ def process_keys(data, tokenizer, seqlen, seed, content_key, do_sample, sources=
         fh.close()
 
 
-class SpecialTokens(Enum):
-    END_OF_TEXT = 0
-    PAD = -1
-    END_OF_DOCUMENT = -2
-
-
 def parse_s3_path(s3_path):
     """
     Extract the bucket and key from an S3 path.
@@ -635,7 +629,6 @@ def main(args):
         )
     num_nodes = len(ray.nodes())
 
-    SpecialTokens = enum.Enum
     Sources = enum.Enum("Sources", {item["source"]: index for index, item in enumerate(data["sources"])})
 
     input_folders = args.input.split(",")