tasq/node_modules/@claude-flow/embeddings/dist/chunking.d.ts

68 lines
1.8 KiB
TypeScript

/**
* Document Chunking Utilities
*
* Features:
* - Configurable chunk size and overlap
* - Sentence-aware splitting
* - Paragraph-aware splitting
* - Token-based chunking (approximate)
* - Metadata tracking for reconstruction
*/
/**
* Chunking configuration
*/
export interface ChunkingConfig {
/** Maximum chunk size in characters (default: 512) */
maxChunkSize?: number;
/** Overlap between chunks in characters (default: 50) */
overlap?: number;
/** Strategy for splitting (default: 'sentence') */
strategy?: 'character' | 'sentence' | 'paragraph' | 'token';
/** Minimum chunk size (default: 100) */
minChunkSize?: number;
/** Include metadata with chunks */
includeMetadata?: boolean;
}
/**
* Chunk result with metadata
*/
export interface Chunk {
/** Chunk text content */
text: string;
/** Original index in document */
index: number;
/** Start position in original text */
startPos: number;
/** End position in original text */
endPos: number;
/** Character count */
length: number;
/** Approximate token count (chars / 4) */
tokenCount: number;
}
/**
* Chunked document result
*/
export interface ChunkedDocument {
/** Array of chunks */
chunks: Chunk[];
/** Original text length */
originalLength: number;
/** Total chunks created */
totalChunks: number;
/** Configuration used */
config: Required<ChunkingConfig>;
}
/**
* Split text into chunks with overlap
*/
export declare function chunkText(text: string, config?: ChunkingConfig): ChunkedDocument;
/**
* Estimate token count for text
*/
export declare function estimateTokens(text: string): number;
/**
* Reconstruct original text from chunks (approximate)
*/
export declare function reconstructFromChunks(chunks: Chunk[]): string;
//# sourceMappingURL=chunking.d.ts.map