Spaces:
Sleeping
Sleeping
| { | |
| "dataset": { | |
| "name": "George-API/cognitive-data", | |
| "split": "train", | |
| "column_mapping": { | |
| "conversations": "text" | |
| }, | |
| "processing": { | |
| "sort_by_id": true, | |
| "maintain_paper_order": true, | |
| "max_seq_length": 2048 | |
| } | |
| }, | |
| "data_formatting": { | |
| "chat_template": "phi", | |
| "roles": { | |
| "system": "System: {content}\n\n", | |
| "human": "Human: {content}\n\n", | |
| "assistant": "Assistant: {content}\n\n", | |
| "user": "Human: {content}\n\n" | |
| }, | |
| "metadata_handling": { | |
| "include_paper_id": true, | |
| "include_chunk_number": true, | |
| "metadata_format": "Paper ID: {paper_id} | Chunk: {chunk_number}" | |
| } | |
| }, | |
| "data_loading": { | |
| "batch_size": 24, | |
| "shuffle": false, | |
| "drop_last": false, | |
| "num_workers": 4, | |
| "pin_memory": true, | |
| "prefetch_factor": 4 | |
| }, | |
| "validation": { | |
| "log_samples": 3, | |
| "log_interval": 50, | |
| "metrics": ["processed", "skipped", "avg_tokens", "unique_papers"] | |
| } | |
| } |