Parser1[Streaming JSON Parser] PartialXML --> Parser2[Custom XML Parser] Progress --> Aggregator[Progress Aggregator] end subgraph "Data Challenges" LargeObjects[Large Objects] Circular[Circular Refs] TypedData[Special Types] LargeObjects --> Normalizer[normalizeToSize] Circular --> Normalizer TypedData --> Normalizer end subgraph "LLM Challenges" Errors[Tool Errors] Context[Dynamic Context] Synthesis[Multi-Result] Errors --> Formatter[Error Formatter] Context --> Assembler[Context Assembly] Synthesis --> Synthesizer[AgentTool Synthesis] end "> Parser1[Streaming JSON Parser] PartialXML --> Parser2[Custom XML Parser] Progress --> Aggregator[Progress Aggregator] end subgraph "Data Challenges" LargeObjects[Large Objects] Circular[Circular Refs] TypedData[Special Types] LargeObjects --> Normalizer[normalizeToSize] Circular --> Normalizer TypedData --> Normalizer end subgraph "LLM Challenges" Errors[Tool Errors] Context[Dynamic Context] Synthesis[Multi-Result] Errors --> Formatter[Error Formatter] Context --> Assembler[Context Assembly] Synthesis --> Synthesizer[AgentTool Synthesis] end "> Parser1[Streaming JSON Parser] PartialXML --> Parser2[Custom XML Parser] Progress --> Aggregator[Progress Aggregator] end subgraph "Data Challenges" LargeObjects[Large Objects] Circular[Circular Refs] TypedData[Special Types] LargeObjects --> Normalizer[normalizeToSize] Circular --> Normalizer TypedData --> Normalizer end subgraph "LLM Challenges" Errors[Tool Errors] Context[Dynamic Context] Synthesis[Multi-Result] Errors --> Formatter[Error Formatter] Context --> Assembler[Context Assembly] Synthesis --> Synthesizer[AgentTool Synthesis] end ">
graph TB
subgraph "Streaming Challenges"
PartialJSON[Partial JSON Stream]
PartialXML[Partial XML Stream]
Progress[Concurrent Progress]
PartialJSON --> Parser1[Streaming JSON Parser]
PartialXML --> Parser2[Custom XML Parser]
Progress --> Aggregator[Progress Aggregator]
end
subgraph "Data Challenges"
LargeObjects[Large Objects]
Circular[Circular Refs]
TypedData[Special Types]
LargeObjects --> Normalizer[normalizeToSize]
Circular --> Normalizer
TypedData --> Normalizer
end
subgraph "LLM Challenges"
Errors[Tool Errors]
Context[Dynamic Context]
Synthesis[Multi-Result]
Errors --> Formatter[Error Formatter]
Context --> Assembler[Context Assembly]
Synthesis --> Synthesizer[AgentTool Synthesis]
end
When an LLM streams a tool use request, it doesn't send complete JSON all at once. Instead, you might receive fragments like:
{"file_path": "/src/
{"file_path": "/src/main.
{"file_path": "/src/main.ts", "old_str
{"file_path": "/src/main.ts", "old_string": "console.log('hell
The streaming JSON parser solves this elegantly:
class StreamingToolInputParser {
private buffer: string = '';
private state = {
depth: 0, // Nesting level of {}/[]
inString: boolean, // Currently inside a string?
escape: boolean, // Previous char was backslash?
stringChar: '"' | "'" | null, // Which quote started current string
};
addChunk(chunk: string): ParseResult {
this.buffer += chunk;
// Update parser state character by character
for (let i = 0; i < chunk.length; i++) {
const char = chunk[i];
const prevChar = i > 0 ? chunk[i-1] : this.buffer[this.buffer.length - chunk.length - 1];
// Handle escape sequences
if (this.escape) {
this.escape = false;
continue;
}
if (char === '\\\\\\\\' && this.state.inString) {
this.escape = true;
continue;
}
// String boundary detection
if (!this.state.inString && (char === '"' || char === "'")) {
this.state.inString = true;
this.state.stringChar = char;
} else if (this.state.inString && char === this.state.stringChar) {
this.state.inString = false;
this.state.stringChar = null;
}
// Track nesting depth outside strings
if (!this.state.inString) {
if (char === '{' || char === '[') {
this.state.depth++;
} else if (char === '}' || char === ']') {
this.state.depth--;
// Attempt parse when we return to depth 0
if (this.state.depth === 0) {
return this.tryParse();
}
}
}
}
// Might be complete even without depth 0 (malformed JSON)
if (this.buffer.length > 10000) { // Safety limit
return this.tryParseWithRecovery();
}
return { complete: false };
}
private tryParse(): ParseResult {
try {
const parsed = JSON.parse(this.buffer);
return { complete: true, value: parsed };
} catch (e) {
return { complete: false, partial: this.buffer };
}
}
private tryParseWithRecovery(): ParseResult {
let attemptBuffer = this.buffer;
// Recovery strategy 1: Close unclosed strings
if (this.state.inString && this.state.stringChar) {
attemptBuffer += this.state.stringChar;
// Try to close any unclosed structures
attemptBuffer += '}'.repeat(Math.max(0, this.state.depth));
attemptBuffer += ']'.repeat(
Math.max(0, (attemptBuffer.match(/\\\\[/g) || []).length -
(attemptBuffer.match(/\\\\]/g) || []).length)
);
}
// Recovery strategy 2: Auto-close based on structure analysis
const braceBalance = (attemptBuffer.match(/{/g) || []).length -
(attemptBuffer.match(/}/g) || []).length;
const bracketBalance = (attemptBuffer.match(/\\\\[/g) || []).length -
(attemptBuffer.match(/\\\\]/g) || []).length;
attemptBuffer += '}'.repeat(Math.max(0, braceBalance));
attemptBuffer += ']'.repeat(Math.max(0, bracketBalance));
try {
const parsed = JSON.parse(attemptBuffer);
return {
complete: true,
value: parsed,
wasRepaired: true,
repairs: {
closedStrings: this.state.inString,
addedBraces: braceBalance,
addedBrackets: bracketBalance
}
};
} catch (e) {
// Recovery strategy 3: Extract what we can
const partialResult = this.extractPartialData(this.buffer);
return {
complete: false,
partial: partialResult,
error: e.message
};
}
}
private extractPartialData(buffer: string): any {
// Try to extract complete key-value pairs
const result: any = {};
const keyValuePattern = /"(\\\\w+)":\\\\s*("([^"\\\\\\\\]*(\\\\\\\\.[^"\\\\\\\\]*)*)"|true|false|null|\\\\d+)/g;
let match;
while ((match = keyValuePattern.exec(buffer)) !== null) {
const [, key, value] = match;
try {
result[key] = JSON.parse(value);
} catch {
result[key] = value; // Store as string if parse fails
}
}
return Object.keys(result).length > 0 ? result : null;
}
}
Why This Is Novel:
Performance Characteristics:
Input Size | Parse Time | Memory | Success Rate |
---|---|---|---|
<1KB | <0.1ms | O(n) | 100% |
1-10KB | 0.1-1ms | O(n) | 99.9% |
10-100KB | 1-10ms | O(n) | 99.5% |
>100KB | 10-50ms | O(n) | 98% (with recovery) |
normalizeToSize
Algorithm: Smart Data TruncationWhen sending data to LLMs or telemetry services, size limits are critical. The normalizeToSize
algorithm intelligently reduces object size while preserving structure:
class DataNormalizer {
static normalizeToSize(
obj: any,
maxDepth: number = 3,
maxSizeInBytes: number = 100_000
): any {
// First attempt at full depth
let normalized = this.normalize(obj, maxDepth);
let size = this.estimateSize(normalized);
// Iteratively reduce depth until size fits
while (size > maxSizeInBytes && maxDepth > 0) {
maxDepth--;
normalized = this.normalize(obj, maxDepth);
size = this.estimateSize(normalized);
}
return normalized;
}
private static normalize(
obj: any,
maxDepth: number,
currentDepth: number = 0,
visited = new WeakSet()
): any {
// Handle primitives
if (obj === null) return '[null]';
if (obj === undefined) return '[undefined]';
if (typeof obj === 'number' && isNaN(obj)) return '[NaN]';
if (typeof obj === 'bigint') return `[BigInt: ${obj}n]`;
// Handle functions and symbols
if (typeof obj === 'function') {
return `[Function: ${obj.name || 'anonymous'}]`;
}
if (typeof obj === 'symbol') {
return `[Symbol: ${obj.description || 'Symbol'}]`;
}
// Primitives pass through
if (['string', 'number', 'boolean'].includes(typeof obj)) {
return obj;
}
// Depth limit reached
if (currentDepth >= maxDepth) {
if (Array.isArray(obj)) return `[Array(${obj.length})]`;
if (obj.constructor?.name) {
return `[${obj.constructor.name}]`;
}
return '[Object]';
}
// Circular reference detection
if (visited.has(obj)) {
return '[Circular]';
}
visited.add(obj);
// Special handling for known types
if (this.isReactElement(obj)) {
return `[React.${obj.type?.name || obj.type || 'Element'}]`;
}
if (this.isVueComponent(obj)) {
return `[Vue.${obj.$options?.name || 'Component'}]`;
}
if (obj instanceof Error) {
return {
name: obj.name,
message: obj.message,
stack: this.truncateStack(obj.stack)
};
}
if (obj instanceof Date) {
return obj.toISOString();
}
if (obj instanceof RegExp) {
return obj.toString();
}
// Handle DOM elements
if (this.isDOMElement(obj)) {
return `[${obj.tagName}${obj.id ? '#' + obj.id : ''}]`;
}
// Handle toJSON method
if (typeof obj.toJSON === 'function') {
try {
return this.normalize(
obj.toJSON(),
maxDepth,
currentDepth,
visited
);
} catch {
return '[Object with toJSON error]';
}
}
// Arrays
if (Array.isArray(obj)) {
const result = [];
const maxItems = 100; // Limit array size
for (let i = 0; i < Math.min(obj.length, maxItems); i++) {
result.push(
this.normalize(obj[i], maxDepth, currentDepth + 1, visited)
);
}
if (obj.length > maxItems) {
result.push(`... ${obj.length - maxItems} more items`);
}
return result;
}
// Objects
const result: any = {};
const keys = Object.keys(obj);
const maxProps = 50; // Limit object properties
// Respect Sentry directives
if (obj.__sentry_skip_normalization__) {
return obj;
}
const effectiveMaxDepth =
obj.__sentry_override_normalization_depth__ || maxDepth;
for (let i = 0; i < Math.min(keys.length, maxProps); i++) {
const key = keys[i];
try {
result[key] = this.normalize(
obj[key],
effectiveMaxDepth,
currentDepth + 1,
visited
);
} catch {
result[key] = '[Error accessing property]';
}
}
if (keys.length > maxProps) {
result['...'] = `${keys.length - maxProps} more properties`;
}
return result;
}
private static estimateSize(obj: any): number {
// Fast estimation without full serialization
const sample = JSON.stringify(obj).substring(0, 1000);
const avgCharSize = new Blob([sample]).size / sample.length;
const fullLength = this.estimateJsonLength(obj);
return Math.ceil(fullLength * avgCharSize);
}
private static estimateJsonLength(obj: any, visited = new WeakSet()): number {
if (obj === null || obj === undefined) return 4; // "null"
if (typeof obj === 'boolean') return obj ? 4 : 5; // "true" : "false"
if (typeof obj === 'number') return String(obj).length;
if (typeof obj === 'string') return obj.length + 2; // quotes
if (visited.has(obj)) return 12; // "[Circular]"
visited.add(obj);
if (Array.isArray(obj)) {
let length = 2; // []
for (const item of obj) {
length += this.estimateJsonLength(item, visited) + 1; // comma
}
return length;
}
if (typeof obj === 'object') {
let length = 2; // {}
for (const key in obj) {
length += key.length + 3; // "key":
length += this.estimateJsonLength(obj[key], visited) + 1; // comma
}
return length;
}
return 10; // Default estimate
}
}
Why This Is Novel: