执行官方微调demo遇到的问题 #1096
执行官方微调demo遇到的问题
#1096
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
执行该命令后 python finetune_hf.py ../AdvertiseGen/ /data/cxk_home/ChatGLM3/chatglm3-6b configs/lora.yaml 出现下面问题,看不出来是什么引起的。
`Map (num_proc=16): 0%| | 0/114599 [00:00<?, ? examples/s]
RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 623, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3482, in _map_single
batch = apply_function_on_filtered_inputs(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3361, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py", line 284, in process_batch
batched_conv = batch['conversations']
~~~~~^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/formatting/formatting.py", line 270, in getitem
value = self.data[key]
~~~~~~~~~^^^^^
KeyError: 'conversations'
"""
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py:464 in main │
│ │
│ 461 │ tokenizer, model = load_tokenizer_and_model(model_dir, peft_config=ft_config.peft_co │
│ 462 │ data_manager = DataManager(data_dir, ft_config.data_config) │
│ 463 │ │
│ ❱ 464 │ train_dataset = data_manager.get_dataset( │
│ 465 │ │ Split.TRAIN, │
│ 466 │ │ functools.partial( │
│ 467 │ │ │ process_batch, │
│ │
│ /data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py:263 in get_dataset │
│ │
│ 260 │ │ │ remove_columns = orig_dataset.column_names │
│ 261 │ │ else: │
│ 262 │ │ │ remove_columns = None │
│ ❱ 263 │ │ return orig_dataset.map( │
│ 264 │ │ │ process_fn, │
│ 265 │ │ │ batched=batched, │
│ 266 │ │ │ remove_columns=remove_columns, │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 593 in wrapper │
│ │
│ 590 │ │ else: │
│ 591 │ │ │ self: "Dataset" = kwargs.pop("self") │
│ 592 │ │ # apply actual function │
│ ❱ 593 │ │ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) │
│ 594 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 595 │ │ for dataset in datasets: │
│ 596 │ │ │ # Remove task templates if a column mapping of the template is no longer val │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 558 in wrapper │
│ │
│ 555 │ │ │ "output_all_columns": self._output_all_columns, │
│ 556 │ │ } │
│ 557 │ │ # apply actual function │
│ ❱ 558 │ │ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) │
│ 559 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 560 │ │ # re-apply format to the output │
│ 561 │ │ for dataset in datasets: │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 3197 in map │
│ │
│ 3194 │ │ │ │ │ │ total=pbar_total, │
│ 3195 │ │ │ │ │ │ desc=(desc or "Map") + f" (num_proc={num_proc})", │
│ 3196 │ │ │ │ │ ) as pbar: │
│ ❱ 3197 │ │ │ │ │ │ for rank, done, content in iflatmap_unordered( │
│ 3198 │ │ │ │ │ │ │ pool, Dataset._map_single, kwargs_iterable=kwargs_per_job │
│ 3199 │ │ │ │ │ │ ): │
│ 3200 │ │ │ │ │ │ │ if done: │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py │
│ :663 in iflatmap_unordered │
│ │
│ 660 │ │ finally: │
│ 661 │ │ │ if not pool_changed: │
│ 662 │ │ │ │ # we get the result in case there's an error to raise │
│ ❱ 663 │ │ │ │ [async_result.get(timeout=0.05) for async_result in async_results] │
│ 664 │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py │
│ :663 in │
│ │
│ 660 │ │ finally: │
│ 661 │ │ │ if not pool_changed: │
│ 662 │ │ │ │ # we get the result in case there's an error to raise │
│ ❱ 663 │ │ │ │ [async_result.get(timeout=0.05) for async_result in async_results] │
│ 664 │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/multiprocess/pool.py:774 │
│ in get │
│ │
│ 771 │ │ if self._success: │
│ 772 │ │ │ return self._value │
│ 773 │ │ else: │
│ ❱ 774 │ │ │ raise self._value │
│ 775 │ │
│ 776 │ def _set(self, i, obj): │
│ 777 │ │ self._success, self._value = obj │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
KeyError: 'conversations'
`
Beta Was this translation helpful? Give feedback.
All reactions