251 lines
8.0 KiB
TypeScript
251 lines
8.0 KiB
TypeScript
/**
|
|
* Scientific Research: Research Paper Similarity and Discovery
|
|
*
|
|
* Use Case: Find similar research papers, identify research trends,
|
|
* and discover cross-domain connections.
|
|
*
|
|
* Optimization Priority: QUALITY + CROSS-DOMAIN DISCOVERY
|
|
*/
|
|
|
|
import { UnifiedMetrics } from '../../types';
|
|
|
|
export const RESEARCH_ATTENTION_CONFIG = {
|
|
heads: 12, // High-quality attention (more than 8-head optimal)
|
|
forwardPassTargetMs: 100, // 100ms acceptable (research workflow)
|
|
batchSize: 128, // Large batches for corpus processing
|
|
precision: 'float32' as const, // Full precision for scientific data
|
|
crossDomainDiscovery: true, // Enable cross-field connections
|
|
|
|
// Hierarchical clustering for research topics
|
|
clustering: {
|
|
algorithm: 'hierarchical' as const, // Better for taxonomy
|
|
linkage: 'ward' as const, // Ward's minimum variance
|
|
hierarchicalLevels: 5, // Deep taxonomy (field > subfield > topic > subtopic > specific)
|
|
semanticPurity: 0.90 // High purity for research domains
|
|
},
|
|
|
|
// High-recall for comprehensive literature review
|
|
recallTarget: 0.98, // 98% recall (don't miss relevant papers)
|
|
|
|
// Self-organizing for evolving research landscape
|
|
selfHealing: {
|
|
enabled: true,
|
|
adaptationIntervalMs: 60000, // 1-minute adaptation (not real-time)
|
|
degradationThreshold: 0.03, // 3% tolerance
|
|
taxonomyUpdates: true // Update research taxonomy
|
|
}
|
|
};
|
|
|
|
// Research-specific metrics
|
|
export interface ResearchMetrics extends UnifiedMetrics {
|
|
crossDomainConnections: number; // Cross-field discoveries
|
|
citationAccuracy: number; // Citation network accuracy
|
|
taxonomyQuality: number; // Research taxonomy quality
|
|
noveltyScore: number; // Novel connection discovery rate
|
|
expertAgreement: number; // Agreement with domain experts
|
|
}
|
|
|
|
// Research connection interface
|
|
export interface ResearchConnection {
|
|
paperId: string;
|
|
title: string;
|
|
authors: string[];
|
|
similarity: number;
|
|
domain: string;
|
|
crossDomainConnection?: {
|
|
targetDomain: string;
|
|
connectionType: string;
|
|
noveltyScore: number;
|
|
};
|
|
citations: number;
|
|
}
|
|
|
|
// Example: Research paper discovery with cross-domain connections
|
|
export async function discoverRelatedResearch(
|
|
paperEmbedding: Float32Array, // Paper abstract + citations embeddings
|
|
researchCorpus: any, // HNSWGraph type
|
|
applyAttention: (data: Float32Array, config: any) => Promise<Float32Array>,
|
|
buildResearchTaxonomy: (papers: any[], config: any) => Promise<any>,
|
|
findCrossDomainConnections: (papers: any[], taxonomy: any) => Promise<any[]>,
|
|
findDomain: (paper: any, taxonomy: any) => string,
|
|
includeCrossDomain: boolean = true
|
|
): Promise<ResearchConnection[]> {
|
|
const config = RESEARCH_ATTENTION_CONFIG;
|
|
|
|
// 12-head attention for nuanced understanding
|
|
const enhanced = await applyAttention(paperEmbedding, config);
|
|
|
|
// High-recall search (k=100 for comprehensive review)
|
|
const candidates = await researchCorpus.search(enhanced, 100);
|
|
|
|
// Hierarchical clustering for taxonomy
|
|
const taxonomy = await buildResearchTaxonomy(candidates, config.clustering);
|
|
|
|
// Cross-domain discovery (find connections across fields)
|
|
const crossDomain = includeCrossDomain
|
|
? await findCrossDomainConnections(candidates, taxonomy)
|
|
: [];
|
|
|
|
return candidates.map((p: any) => ({
|
|
paperId: p.id,
|
|
title: p.metadata.title,
|
|
authors: p.metadata.authors,
|
|
similarity: p.score,
|
|
domain: findDomain(p, taxonomy),
|
|
crossDomainConnection: crossDomain.find((c: any) => c.paperId === p.id),
|
|
citations: p.metadata.citations
|
|
}));
|
|
}
|
|
|
|
// Performance targets for research
|
|
export const RESEARCH_PERFORMANCE_TARGETS = {
|
|
recallAt100: 0.98, // 98% recall (comprehensive)
|
|
p95LatencyMs: 200, // 200ms p95 (batch processing)
|
|
crossDomainRate: 0.15, // 15% cross-domain discoveries
|
|
expertAgreement: 0.85, // 85% agreement with experts
|
|
uptimePercent: 99.0 // 99% uptime (2 nines)
|
|
};
|
|
|
|
// Research field-specific configurations
|
|
export const RESEARCH_CONFIG_VARIATIONS = {
|
|
// Computer science (fast-moving field, many papers)
|
|
computerScience: {
|
|
...RESEARCH_ATTENTION_CONFIG,
|
|
heads: 10,
|
|
forwardPassTargetMs: 50, // Faster for large corpus
|
|
batchSize: 256,
|
|
recallTarget: 0.97 // Slightly lower for speed
|
|
},
|
|
|
|
// Medicine (high precision required, slower pace)
|
|
medicine: {
|
|
...RESEARCH_ATTENTION_CONFIG,
|
|
heads: 16, // Maximum quality
|
|
forwardPassTargetMs: 150,
|
|
recallTarget: 0.99, // Highest recall
|
|
precision: 'float32' as const
|
|
},
|
|
|
|
// Physics (mathematical precision, cross-domain links)
|
|
physics: {
|
|
...RESEARCH_ATTENTION_CONFIG,
|
|
heads: 14,
|
|
crossDomainDiscovery: true,
|
|
clustering: {
|
|
...RESEARCH_ATTENTION_CONFIG.clustering,
|
|
hierarchicalLevels: 6 // Deeper for specialized subfields
|
|
}
|
|
},
|
|
|
|
// Social sciences (qualitative, broader connections)
|
|
socialSciences: {
|
|
...RESEARCH_ATTENTION_CONFIG,
|
|
heads: 12,
|
|
crossDomainDiscovery: true,
|
|
clustering: {
|
|
...RESEARCH_ATTENTION_CONFIG.clustering,
|
|
semanticPurity: 0.85 // More flexible clustering
|
|
}
|
|
}
|
|
};
|
|
|
|
// Search mode adaptations
|
|
export function adaptConfigToSearchMode(
|
|
baseConfig: typeof RESEARCH_ATTENTION_CONFIG,
|
|
mode: 'literature-review' | 'novelty-discovery' | 'citation-tracing' | 'interdisciplinary'
|
|
): typeof RESEARCH_ATTENTION_CONFIG {
|
|
switch (mode) {
|
|
case 'literature-review':
|
|
return {
|
|
...baseConfig,
|
|
heads: 12,
|
|
recallTarget: 0.99, // Comprehensive recall
|
|
crossDomainDiscovery: false // Stay within field
|
|
};
|
|
case 'novelty-discovery':
|
|
return {
|
|
...baseConfig,
|
|
heads: 16, // Maximum quality
|
|
crossDomainDiscovery: true,
|
|
clustering: {
|
|
...baseConfig.clustering,
|
|
semanticPurity: 0.80 // More flexible for novel connections
|
|
}
|
|
};
|
|
case 'citation-tracing':
|
|
return {
|
|
...baseConfig,
|
|
heads: 10,
|
|
forwardPassTargetMs: 50,
|
|
recallTarget: 0.95, // Fast, focused
|
|
crossDomainDiscovery: false
|
|
};
|
|
case 'interdisciplinary':
|
|
return {
|
|
...baseConfig,
|
|
heads: 14,
|
|
crossDomainDiscovery: true,
|
|
clustering: {
|
|
...baseConfig.clustering,
|
|
hierarchicalLevels: 6, // Deep taxonomy for connections
|
|
semanticPurity: 0.82 // Flexible for cross-field
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
// Research stage adaptations
|
|
export function adaptConfigToResearchStage(
|
|
baseConfig: typeof RESEARCH_ATTENTION_CONFIG,
|
|
stage: 'initial-exploration' | 'hypothesis-formation' | 'validation' | 'writing'
|
|
): typeof RESEARCH_ATTENTION_CONFIG {
|
|
switch (stage) {
|
|
case 'initial-exploration':
|
|
return {
|
|
...baseConfig,
|
|
heads: 12,
|
|
crossDomainDiscovery: true,
|
|
recallTarget: 0.95 // Broad search
|
|
};
|
|
case 'hypothesis-formation':
|
|
return {
|
|
...baseConfig,
|
|
heads: 14,
|
|
crossDomainDiscovery: true,
|
|
recallTarget: 0.98 // Comprehensive
|
|
};
|
|
case 'validation':
|
|
return {
|
|
...baseConfig,
|
|
heads: 16, // Maximum precision
|
|
recallTarget: 0.99,
|
|
crossDomainDiscovery: false // Focused
|
|
};
|
|
case 'writing':
|
|
return {
|
|
...baseConfig,
|
|
heads: 10,
|
|
forwardPassTargetMs: 50, // Fast lookups
|
|
recallTarget: 0.95
|
|
};
|
|
}
|
|
}
|
|
|
|
// Citation network analysis
|
|
export interface CitationNetworkMetrics {
|
|
networkDensity: number;
|
|
clusteringCoefficient: number;
|
|
averagePathLength: number;
|
|
communityModularity: number;
|
|
}
|
|
|
|
export function analyzeCitationNetwork(papers: ResearchConnection[]): CitationNetworkMetrics {
|
|
// Placeholder implementation
|
|
return {
|
|
networkDensity: 0.15,
|
|
clusteringCoefficient: 0.45,
|
|
averagePathLength: 3.2,
|
|
communityModularity: 0.78
|
|
};
|
|
}
|