/** * Document Chunking Utilities * * Features: * - Configurable chunk size and overlap * - Sentence-aware splitting * - Paragraph-aware splitting * - Token-based chunking (approximate) * - Metadata tracking for reconstruction */ /** * Chunking configuration */ export interface ChunkingConfig { /** Maximum chunk size in characters (default: 512) */ maxChunkSize?: number; /** Overlap between chunks in characters (default: 50) */ overlap?: number; /** Strategy for splitting (default: 'sentence') */ strategy?: 'character' | 'sentence' | 'paragraph' | 'token'; /** Minimum chunk size (default: 100) */ minChunkSize?: number; /** Include metadata with chunks */ includeMetadata?: boolean; } /** * Chunk result with metadata */ export interface Chunk { /** Chunk text content */ text: string; /** Original index in document */ index: number; /** Start position in original text */ startPos: number; /** End position in original text */ endPos: number; /** Character count */ length: number; /** Approximate token count (chars / 4) */ tokenCount: number; } /** * Chunked document result */ export interface ChunkedDocument { /** Array of chunks */ chunks: Chunk[]; /** Original text length */ originalLength: number; /** Total chunks created */ totalChunks: number; /** Configuration used */ config: Required; } /** * Split text into chunks with overlap */ export declare function chunkText(text: string, config?: ChunkingConfig): ChunkedDocument; /** * Estimate token count for text */ export declare function estimateTokens(text: string): number; /** * Reconstruct original text from chunks (approximate) */ export declare function reconstructFromChunks(chunks: Chunk[]): string; //# sourceMappingURL=chunking.d.ts.map