# Copyright © 2025 Intellisol LLC. All Rights Reserved.
#
# This file is part of the Intellisol Automation System.
#
# This software is a trade secret of Intellisol LLC. It is proprietary and
# confidential information. You may not disclose this software or any part of it
# to any third party, or use it in any way not expressly authorized by the
# accompanying End-User License Agreement (EULA).
#
# UNPUBLISHED. RIGHTS RESERVED.


# data_extraction_system/tools/document_splitter.py
import os
from typing import List

def split_document(document_path: str, chunk_size: int = 2000) -> List[str]:
    """Splits a document into smaller chunks."""
    try:
        # Attempt to read as text first. For PDFs, a different library like PyPDF2 would be needed.
        with open(document_path, "r", encoding="utf-8") as f:
            document = f.read()
    except FileNotFoundError:
        return ["Error: Document not found."]
    except Exception as e:
        return [f"Error reading document: {e}"]

    chunks = [document[i:i + chunk_size] for i in range(0, len(document), chunk_size)]
    return chunks
