package com.controul.utils.string { import flash.text.TextField; import flash.text.TextFormat; import flash.utils.ByteArray; //BENCH*/ import flash.utils.getTimer; /** * A diff/patch solution based on random sampling. * @author Hristo Dachev http://blog.controul.com */ public class Patch { ///// ///// ///// ///// Private. private static const intraword : RegExp = /[\s!?.,:;'"`‘’“”\-()[\]{}]/; private static const chunkPattern : RegExp = /^@@ -(\d+),(\d+) \+(\d+),(\d+) @@((?:[\n\r][ +\-][^\n\r]*)+)/gm; private static const editPattern : RegExp = /^([ +\-])(.*)$/gm; private static const bitap : Bitap = new Bitap; /** * Returns a list of common substrings for the two texts. * @param originalString The original string. * @param modifiedString The modified string. * @param semanticTrim Whether to align changes to word-ends or not. * @return An Array of unsigned integers in the format [ ..., index in original text, index in modified text, common chunk length, ... ] */ private function map ( originalString : String, modifiedString : String, semanticTrim : Boolean = false ) : Array { //BENCH*/ var time : uint = getTimer (); var originalLen : uint = originalString.length, modifiedLen : uint = modifiedString.length, shorterLen : uint = originalLen > modifiedLen ? modifiedLen : originalLen, minWord : int = Math.log ( shorterLen ) - 3; if ( minWord < 2 ) minWord = 2; //DEBUG*/ trace ( this + ' @ Patch.map () : Commonalities being mapped over', originalLen, modifiedLen, shorterLen ); // // // Triviality checks. // // String hasn't changed. if ( originalLen == modifiedLen && originalString == modifiedString ) //DEBUG*/ { //DEBUG*/ trace ( ' The two strings are identical.' ); return [ 0, 0, modifiedLen ]; //DEBUG*/ } // // Strings are too short. if ( shorterLen <= minWord ) //DEBUG*/ { //DEBUG*/ trace ( ' At least one of the strings is too short to be compared, will record it as a plain edit.' ); return []; //DEBUG*/ } // // // Scan for common substrings. var a : uint = 0, b : uint = 0, c : uint = 0; var chunks : Array = [], // table : Array = [], // iTable : int = 0, numChunks : int = 0; var shortString : String, longString : String, searchScope : uint = shorterLen - minWord, l : Number = Math.log ( searchScope ), s : int = 0, hit : uint, fails : int = 0, threshold : int = l * l + 0.99, xShort : uint, xShortEnd : uint, xLong : uint, xLen : uint, i : int, n : int, flag : Boolean, overlapLeft : uint, overlapRight : uint; //BENCH*/ var t : int = getTimer (); //DEBUG*/ trace ( 'minWord = ' + minWord + ', minWord = ' + minWord ); if ( originalLen < modifiedLen ) { shortString = originalString; longString = modifiedString; } else { shortString = modifiedString; longString = originalString; } while ( fails < threshold ) { if ( s == 0 ) { hit = 0; s = -1; } else if ( s == -1 ) { hit = searchScope; s = Math.random () * 2147483646 + 1; } else { /* if ( iTable == 0 ) { // Directed sampling. table.length = 0; l = searchScope / ( threshold + 1 ); for ( i = 0; i < threshold; i ++ ) { table [ i ] = l * ( i + 1 ); } for ( i = threshold - 1; i > 0; i -- ) { s = ( s * 16807 ) % 2147483647; a = s / 2147483647 * i; b = table [ a ]; table [ a ] = table [ i ]; table [ i ] = b; } } hit = table [ iTable ++ ]; /*/ s = ( s * 16807 ) % 2147483647; hit = searchScope * s / 2147483647 + 0.5; //*/ } //DEBUG*/ trace ( fails + ': searchScope = ' + searchScope + ', hit (unmapped) = ' + hit); // Map to an uncovered search location. // Suggest initial cursor positions for expansion of the substring. var startLeftSearch : uint = 0, startRightSearch : uint = shorterLen, maxLeftSearch : uint = 0, maxRightSearch : uint = shorterLen; for ( i = 0; i < numChunks; i += 3 ) { xShort = chunks [ i ]; xLen = chunks [ int ( i + 2 ) ]; if ( hit >= xShort ) { hit += xLen; if ( i && chunks [ int ( i - 3 ) ] + chunks [ int ( i - 1 ) ] > xShort ) hit -= chunks [ int ( i - 3 ) ] + chunks [ int ( i - 1 ) ] - xShort; if ( xShort > maxLeftSearch ) maxLeftSearch = xShort; if ( xShort + xLen > startLeftSearch ) startLeftSearch = xShort + xLen; } else { if ( xShort < startRightSearch ) startRightSearch = xShort; if ( xShort + xLen < maxRightSearch ) maxRightSearch = xShort + xLen; else break; } } //DEBUG*/ trace ( ' hit (mapped) = ' + hit + ', minWord = "' + shortString.substr ( hit, minWord ) + '" ...' ); xLong = longString.indexOf ( shortString.substr ( hit, minWord ) ) + 1; if ( xLong ) { // A common substring is found. //DEBUG*/ trace ( ' Found:' ); xShort = hit; xShortEnd = hit + minWord; xLong --; // Expand to the left. a = maxLeftSearch; b = hit; c = ( a + b ) / 2; if ( c < startLeftSearch || startLeftSearch == 0 ) c = startLeftSearch; //DEBUG*/ var cc : int = 0; while ( b - a > 1 ) { //DEBUG*/ cc ++; //DEBUG*/ if ( cc > 50 ) //DEBUG*/ throw new Error ( 'Common substring expansion to the left times-out.' ); hit = longString.indexOf ( shortString.substring ( c, xShortEnd ), xLong + c - xShort ) + 1; if ( hit > 0 ) { b = c; xLong = hit - 1; xShort = c; } else a = c; c = ( a + b ) / 2; // + 0, bias the search to the left } xShort = b; xLen = xShortEnd - xShort; //DEBUG*/ trace ( ' expansion to the left: ' + cc + ' iterations.' ); // Expand to the right. a = xShort + xLen; b = maxRightSearch; c = ( a + b ) / 2 + 0.5; if ( c > startRightSearch || startRightSearch == shorterLen ) c = startRightSearch; //DEBUG*/ cc = 0; while ( b - a > 1 ) { //DEBUG*/ cc ++; //DEBUG*/ if ( cc > 50 ) //DEBUG*/ throw new Error ( 'Common substring expansion to the right times-out.' ); hit = longString.indexOf ( shortString.substring ( xShort, c ), xLong ) + 1; if ( hit > 0 ) { xLong = hit - 1; a = c; } else b = c; c = ( a + b ) / 2 + 0.5; // + 0.5, bias the search to the right } //DEBUG*/ trace ( ' expansion to the right: ' + cc + ' iterations.' ); // Discard if shorter than a given length. xLen = a - xShort; // if ( xLen < minWord ) // { // //DEBUG*/ trace ( ' Discarding chunk', xShort, xLong, xLen ); // //DEBUG*/ trace ( ' "' + shortString.substr ( xShort, xLen ) + '"' ); // fails ++; // continue; // } // Record the common chunk. //DEBUG*/ trace ( ' Common chunk found', xShort, xLong, xLen ); //DEBUG*/ trace ( ' "' + shortString.substr ( xShort, xLen ) + '"' ); flag = false; overlapLeft = 0; overlapRight = 0; for ( i = 0; i <= numChunks; i += 3 ) // Vector format : ... , index in shortString , index in longString , chunk length , ... { if ( i == numChunks ) { if ( !flag ) { chunks [ i ] = xShort; chunks [ int ( i + 1 ) ] = xLong; chunks [ int ( i + 2 ) ] = xLen; break; } } else if ( chunks [ i ] <= xShort ) { if ( chunks [ int ( i + 2 ) ] + chunks [ i ] - xShort > overlapLeft ) overlapLeft = chunks [ int ( i + 2 ) ] + chunks [ i ] - xShort; } else { if ( xLen + xShort - chunks [ i ] > overlapRight ) overlapRight = xShort + xLen - chunks [ i ]; if ( !flag ) { flag = true; chunks.splice ( i, 0, xShort, xLong, xLen ); i += 3; } } } numChunks += 3; //DEBUG*/ if ( overlapLeft + overlapRight ) //DEBUG*/ trace ( ' Pattern overlaps: [ ' + overlapLeft + ' > ... < ' + overlapRight + ' ] (pattern length : ' + xLen + ')' ); overlapLeft += overlapRight; if ( overlapLeft < xLen ) { if ( searchScope + overlapLeft - xLen >= 0 ) searchScope += overlapLeft - xLen; else break; } // Proceed. l = Math.log ( searchScope ); threshold = l * l + 0.99; fails = 0; // iTable = 0; } else fails ++; } //BENCH*/ trace ( '### SCANNER TOOK ', ( getTimer () - t ), ' MILLISEC' ); if ( numChunks ) { // // Sort chunks by length. // Note : Chunks are already sorted by index in short text, thus the resulting sort is length desc, index-in-short asc. var j : int; for ( i = 3; i < numChunks; i += 3 ) { j = i + 2; xLen = chunks [ j ]; while ( j - 3 > 0 && chunks [ int ( j - 3 ) ] < xLen ) j -= 3; if ( j < i ) { j -= 2; a = chunks [ i ]; b = chunks [ int ( i + 1 ) ]; c = chunks [ int ( i + 2 ) ]; chunks.splice ( i, 3 ); chunks.splice ( j, 0, a, b, c ); } } //BENCH*/ t = getTimer (); // // Solve. var result : Array = [], yShort : uint, yLong : uint, yLen : uint; result [ 0 ] = chunks [ 0 ]; result [ 1 ] = chunks [ 1 ]; result [ 2 ] = chunks [ 2 ]; a = 3; solver : for ( i = 3; i < numChunks; i += 3 ) { xShort = chunks [ i ]; xLong = chunks [ int ( i + 1 ) ]; xLen = chunks [ int ( i + 2 ) ]; check : for ( j = a - 3; j >= 0; j -= 3 ) { yShort = result [ j ]; yLong = result [ int ( j + 1 ) ]; yLen = result [ int ( j + 2 ) ]; if ( !( xShort >= yShort + yLen && xLong >= yLong + yLen ) && !( yShort >= xShort + xLen && yLong >= xLong + xLen ) ) { // Common substrings conflict. //DEBUG*/ trace ( 'Solver : A conflict exists between :' ); //DEBUG*/ trace ( ' ', xShort, xLong, xLen ); //DEBUG*/ trace ( ' "' + shortString.substr ( xShort, xLen ) + '" / "' + longString.substr ( xLong, xLen ) + '"' ); //DEBUG*/ trace ( ' ', yShort, yLong, yLen ); //DEBUG*/ trace ( ' "' + shortString.substr ( yShort, yLen ) + '" / "' + longString.substr ( yLong, yLen ) + '"' ); if ( xShort > yShort && xLong > yLong ) { // Chunks are ordered correctly : the i chunk comes after the chunk at j in both texts. b = Math.max ( yShort + yLen - xShort, yLong + yLen - xLong ); //DEBUG*/ trace ( ' Correctable conflict, left-overlap. Trimming by ' + b + ' (chunk length = ' + xLen + ')' ); if ( xLen - b >= minWord ) { // Left-trim the i (shorter) chunk to resolve the conflict. xShort += b; xLong += b; xLen -= b; continue check; } //DEBUG*/ else //DEBUG*/ trace ( ' ... Difference too short, discarding.' ); } else if ( xShort < yShort && xLong < yLong ) { // Chunks are ordered correctly : the i chunk comes before the chunk at j in both texts. b = Math.max ( xShort + xLen - yShort, xLong + xLen - yLong ); //DEBUG*/ trace ( ' Correctable conflict, right-overlap. Trimming by ' + b + ' (chunk length = ' + xLen + ')' ); if ( xLen - b >= minWord ) { // Right-trim the i (shorter) chunk to resolve the conflict. xLen -= b; continue check; } //DEBUG*/ else //DEBUG*/ trace ( ' ... Difference too short, discarding.' ); } else { // Chunks' order in the two texts is in conflict. //DEBUG*/ trace ( ' Chunks are badly ordered. Searching for alternative matches ...' ); if ( xShort > yShort ) { // Search for the modified string to the right. b = yLong + yLen; c = longString.indexOf ( shortString.substr ( xShort, xLen ), b ) + 1; if ( c > 0 ) { //DEBUG*/ trace ( ' Found ! Chunk saved. Moved from ' + xLong + ' to ' + ( c - 1 ) ); // Re-solve for this chunk. xLong = c - 1; j = a; continue check; } //DEBUG*/ else //DEBUG*/ trace ( ' xShort > yShort, but nothing found.' ); } //DEBUG*/ else //DEBUG*/ trace ( ' xShort <= yShort.' ); } continue solver; } } //DEBUG*/ trace ( 'Recording chunk "' + shortString.substr ( xShort, xLen ) + '".' ); result [ a ] = xShort; result [ int ( ++ a ) ] = xLong; result [ int ( ++ a ) ] = xLen; a ++; } //BENCH*/ trace ( '### SOLVER TOOK ', ( getTimer () - t ), ' MILLISEC' ); // // Finalise the solution. numChunks = a; flag = modifiedLen <= originalLen; //BENCH*/ searchScope = result [ 2 ]; //BENCH*/ t = getTimer (); for ( i = 0; i < numChunks; i += 3 ) { // Remap the chunks to [ ... , original-text-index , modified-text-index , chunk-length , ... ] if needed. if ( flag ) { a = result [ i ]; result [ i ] = result [ int ( i + 1 ) ]; result [ int ( i + 1 ) ] = a; } // Semantic trimmer. if ( semanticTrim ) { xShort = result [ i ]; xLong = result [ int ( i + 1 ) ]; xLen = result [ int ( i + 2 ) ]; //DEBUG*/ trace ( 'Semantic trimmer :\n ', xShort, xLong, xLen, '\n "' + originalString.substr ( xShort, xLen ) + '" / "' + modifiedString.substr ( xLong, xLen ) + '"' ); var semanticThreshold : int = minWord * 2; if ( semanticThreshold > xLen - 1 ) semanticThreshold = xLen - 1; if ( xShort && xLong && ( !intraword.test ( originalString.substr ( xShort - 1, 2 ) ) || !intraword.test ( modifiedString.substr ( xLong - 1, 2 ) ) ) ) { a = semanticThreshold; do { if ( a == 0 ) // Cancel. { if ( xLen >= minWord ) { //DEBUG*/ a = 0; xLen += semanticThreshold; xShort -= semanticThreshold; xLong -= semanticThreshold; } break; } xLen --; xShort ++; xLong ++; a --; } while ( !intraword.test ( originalString.charAt ( xShort ) ) || !intraword.test ( modifiedString.charAt ( xLong ) ) ); //DEBUG*/ if ( a ) //DEBUG*/ trace ( ' ... chopped', ( semanticThreshold - a ), 'characters from the left of the chunk.' ); //DEBUG*/ else //DEBUG*/ trace ( ' ... cancels out on the left of the chunk.' ); } if ( semanticThreshold > xLen - 1 ) semanticThreshold = xLen - 1; if ( ( xShort + xLen < originalLen ) && ( xLong + xLen < modifiedLen ) && ( !intraword.test ( originalString.substr ( xShort + xLen - 1, 2 ) ) || !intraword.test ( modifiedString.substr ( xLong + xLen - 1, 2 ) ) ) ) { a = semanticThreshold; do { if ( a == 0 ) // Cancel. { if ( xLen >= minWord ) { //DEBUG*/ a = 0; xLen += semanticThreshold; } break; } xLen --; a --; } while ( !intraword.test ( originalString.charAt ( xShort + xLen - 1 ) ) || !intraword.test ( modifiedString.charAt ( xLong + xLen - 1 ) ) ); //DEBUG*/ if ( a ) //DEBUG*/ trace ( ' ... chopped', ( semanticThreshold - a ), 'characters from the right of the chunk.' ); //DEBUG*/ else //DEBUG*/ trace ( ' ... cancels out on the right of the chunk.' ); } if ( xLen < minWord ) { //DEBUG*/ trace ( ' Discarding chunk', xShort, xLong, xLen ); //DEBUG*/ trace ( ' "' + shortString.substr ( xShort, xLen ) + '"' ); result.splice ( i, 3 ); i -= 3; numChunks -= 3; continue; } result [ i ] = xShort; result [ int ( i + 1 ) ] = xLong; result [ int ( i + 2 ) ] = xLen; } if ( i == 0 ) continue; // Sort by location. j = i; //BENCH*/ searchScope += result [ int ( i + 2 ) ]; while ( j - 3 >= 0 && result [ j - 3 ] > result [ i ] ) j -= 3; if ( j < i ) { a = result [ i ]; b = result [ int ( i + 1 ) ]; c = result [ int ( i + 2 ) ]; result.splice ( i, 3 ); result.splice ( j, 0, a, b, c ); } } //BENCH*/ trace ( '### SEMANTICS TOOK ', ( getTimer () - t ), ' MILLISEC' ); // Cleanup. chunks.length = 0; } else { // No commonalities. //BENCH*/ searchScope = 0; return []; } // Success ! //BENCH*/ time = getTimer () - time; //BENCH*/ trace ( '# DIFF TOOK ', time + ' MILLISEC / MATCHED ' + int ( ( searchScope ) / shorterLen * 100 + 0.5 ) + '%\n' ); return result; } //// //// //// //// Public. /** * Stores the patch data in a compact binary form. */ public var data : ByteArray; /** * The length of context margins appended to the sides of each chunk of patch data. */ public var contextMargin : int = 4; /** * If true, reverses the patch, so that it can reconstruct the 'original' string from the 'modified' string. */ public var reversed : Boolean = false; /** * Diffs two strings and stores the patch. * @param originalString The original string. * @param modifiedString The modified string. * @param semanticTrim Whether to align changes to word-ends or not. */ public function diff ( originalString : String, modifiedString : String, semanticTrim : Boolean = false ) : void { var originalLen : uint = originalString.length, modifiedLen : uint = modifiedString.length; // // Empty the current if ( !data ) data = new ByteArray; else data.length = 0; // // Map commonalities. var map : Array = map ( originalString, modifiedString, semanticTrim ); if ( !map || !map.length ) { data.writeByte ( 0xff ); data.writeUnsignedInt ( 0 ); writeEdit ( 1, originalString ); writeEdit ( 2, modifiedString ); return; } // // Record the patch. var a : uint = 0, b : uint = 0, aIndex : uint, bIndex : uint, cLength : uint, i : int, n : int = map.length, marg : int = contextMargin, marg2 : int = marg + marg, l : int; aIndex = map [ 0 ]; bIndex = map [ 1 ]; cLength = map [ 2 ]; if ( aIndex || bIndex ) { data.writeByte ( 0xff ); data.writeUnsignedInt ( 0 ); if ( aIndex ) writeEdit ( 1, originalString.substring ( 0, aIndex ) ); if ( bIndex ) writeEdit ( 2, modifiedString.substring ( 0, bIndex ) ); } for ( i = 0; i < n; i += 3 ) { if ( cLength > marg2 ) { if ( aIndex || bIndex ) // Close previous context. writeEdit ( 0, originalString.substr ( aIndex, marg ) ); if ( aIndex + cLength < originalLen || bIndex + cLength < modifiedLen ) { // Open next context. data.writeByte ( 0xff ); data.writeUnsignedInt ( aIndex + cLength - marg ); writeEdit ( 0, originalString.substr ( aIndex + cLength - marg, marg ) ); } } else { if ( !aIndex && !bIndex ) { data.writeByte ( 0xff ); data.writeUnsignedInt ( 0 ); } // Write equality. writeEdit ( 0, originalString.substr ( aIndex, cLength ) ); } a = aIndex + cLength; b = bIndex + cLength; if ( i < n - 3 ) { aIndex = map [ int ( i + 3 ) ]; bIndex = map [ int ( i + 4 ) ]; cLength = map [ int ( i + 5 ) ]; } else { aIndex = originalLen; bIndex = modifiedLen; cLength = 0; } if ( a < aIndex ) writeEdit ( 1, originalString.substring ( a, aIndex ) ); if ( b < bIndex ) writeEdit ( 2, modifiedString.substring ( b, bIndex ) ); } } /** * Applies the patch to a supplied string. * @param string The string to which the difference is to be applied. * @return The resulting modified string. */ public function patch ( text : String ) : String { if ( !data || !data.length ) return text; //DEBUG*/ trace ( this + ' @ Patch.patch () : Applying patch ... ' ); var p : uint = data.position, result : String = '', context : String, contextLength : uint, contextMap : Array, firstEdit : uint, chunkIndex : uint, lastChunkEnd : uint, delta : uint, o : int, string : String, a : uint, b : uint, aMapped : uint, bMapped : uint, mapA : Boolean, mapB : Boolean, i : int, n : int; data.position = 0; data.readUnsignedByte (); lastChunkEnd = 0; var O1 : uint = reversed ? 2 : 1; var O2 : uint = reversed ? 1 : 2; while ( data.bytesAvailable ) { // Next chunk. chunkIndex = delta + data.readUnsignedInt (); // Get the total context. context = ''; firstEdit = data.position; while ( data.bytesAvailable && ( o = data.readUnsignedByte () ) != 0xff ) { string = data.readUTF (); if ( o != O2 ) context += string; } contextLength = context.length; // Locate the chunk in the text. if ( text.indexOf ( context, chunkIndex ) == chunkIndex ) { //DEBUG*/ trace ( ' Context\n "' + context + '"\n matched exactly.' ); // Exact match. contextMap = null; } else { //DEBUG*/ trace ( ' Context\n "' + context + '"\n not found where expected. Performing a fuzzy search ...' ); // Perform an approximate string match for the context. var match : uint; if ( contextLength <= bitap.maxPatternLength ) { // Pattern is good for a bitap search. match = bitap.match ( text, context, chunkIndex ) + 1; if ( !match ) //DEBUG*/ { //DEBUG*/ trace ( ' [small pattern] Failed to find context.' ); continue; //DEBUG*/ } a = match - 1; contextMap = map ( context, text.substr ( a, contextLength ) ); // Adjust expectations about following chunks' locations. delta += a - chunkIndex; } else { // Pattern is longer than the supported by the bitap search engine. match = bitap.match ( text, context.substr ( 0, bitap.maxPatternLength ), chunkIndex ) + 1; if ( !match ) //DEBUG*/ { //DEBUG*/ trace ( ' [big pattern] Failed to find context start.' ); continue; //DEBUG*/ } a = match - 1; match = bitap.match ( text, context.substr ( contextLength - bitap.maxPatternLength, bitap.maxPatternLength ), a + contextLength - bitap.maxPatternLength ) + 1; if ( !match ) //DEBUG*/ { //DEBUG*/ trace ( ' [big pattern] Failed to find context end.' ); continue; //DEBUG*/ } b = match - 1 + bitap.maxPatternLength; contextMap = map ( context, text.substr ( a, b ) ); // Adjust expectations about following chunks' locations. delta += ( a - chunkIndex ) + b - ( a + contextLength ); } chunkIndex = a; } // Append the intrachunk text. if ( chunkIndex - lastChunkEnd > 0 ) result += text.substring ( lastChunkEnd, chunkIndex ); // Perform the edits. a = 0; data.position = firstEdit; n = contextMap ? contextMap.length : 0; while ( data.bytesAvailable && ( o = data.readUnsignedByte () ) != 0xff ) { mapA = true; string = data.readUTF (); if ( o != O2 ) { mapB = true; b = a + string.length; } else { mapB = false; b = a; } aMapped = a; bMapped = b; // Map the coordinates of the edit to what was found from the context. for ( i = 0; i < n; i += 3 ) { if ( mapA ) { if ( contextMap [ i ] <= a && contextMap [ i ] + contextMap [ int ( i + 2 ) ] >= a ) { mapA = false; aMapped = a + contextMap [ int ( i + 1 ) ] - contextMap [ i ]; } else if ( contextMap [ i ] > a ) { mapA = false; if ( o != 0 ) aMapped = contextMap [ int ( i + 1 ) ]; else if ( i > 0 ) aMapped = contextMap [ int ( i - 2 ) ] + contextMap [ int ( i - 1 ) ]; else aMapped = 0; } } if ( mapB ) { if ( contextMap [ i ] <= b && contextMap [ i ] + contextMap [ int ( i + 2 ) ] >= b ) { mapB = false; bMapped = b + contextMap [ int ( i + 1 ) ] - contextMap [ i ]; } else if ( contextMap [ i ] > b ) { mapB = false; if ( o == 0 ) bMapped = contextMap [ int ( i + 1 ) ]; else if ( i > 0 ) bMapped = contextMap [ int ( i - 2 ) ] + contextMap [ int ( i - 1 ) ]; else bMapped = 0; } } } aMapped += chunkIndex; bMapped += chunkIndex; if ( bMapped >= aMapped ) { // Perform this edit. if ( o == 0 ) result += text.substring ( aMapped, bMapped ); else if ( o == O2 ) result += string; } a = b; } lastChunkEnd = bMapped; } // Append the rest of the text. if ( lastChunkEnd < text.length ) result += text.substring ( lastChunkEnd ); // Clean-up and return result. if ( contextMap ) contextMap.length = 0; data.position = p; return result; } /** * Gets and sets patch data in the google-diff-match-patch pseudo-GNU diff format. * * Unlike google-diff-match-patch, here the resulting string is expected * to be communicated over a utf-8 enabled connection. Hence, only newline * characters and '%' are URI encoded, which however should remain * compatible with the google-diff-match-patch parser for patch data. */ public function get string () : String { if ( !data || !data.length ) return ''; var result : String = '', edits : String = '', p : uint = data.position, o : int, start : uint = 0, delta : int = 0, aLen : uint, bLen : uint, str : String, strLen : uint; data.position = 0; while ( data.bytesAvailable ) { o = data.readUnsignedByte (); if ( o == 0xff ) { // Close previous chunk. if ( edits.length ) { result += '@@ -' + start + ',' + aLen + ' +' + ( start + delta ) + ',' + bLen + ' @@\n'; result += edits; } edits = ''; delta += bLen - aLen; aLen = 0; bLen = 0; // Begin new chunk. start = data.readUnsignedInt () + 1; } else { str = data.readUTF (); strLen = str.length; // Escape the string. if ( str.indexOf ( '%' ) > -1 ) str = str.split ( '%' ).join ( '%25' ); if ( str.indexOf ( '\n' ) > -1 ) str = str.split ( '\n' ).join ( '%0A' ); if ( str.indexOf ( '\r' ) > -1 ) str = str.split ( '\r' ).join ( '%0D' ); // Record the edit. if ( o == 0 ) { edits += ' ' + str + '\n'; aLen += strLen; bLen += strLen; } else if ( o == 1 ) { edits += '-' + str + '\n'; aLen += strLen; } else if ( o == 2 ) { edits += '+' + str + '\n'; bLen += strLen; } } } if ( edits.length ) { // Close last chunk. result += '@@ -' + start + ',' + aLen + ' +' + ( start + delta ) + ',' + bLen + ' @@\n'; result += edits.substr ( 0, edits.length - 1 ); // Strip last newline. } data.position = p; return result; } public function set string ( patch : String ) : void { if ( !data ) data = new ByteArray; else data.length = 0; var matchData : Object, aIndex : uint, bIndex : uint, aLength : uint, bLength : uint, delta : int, a : uint, b : uint, edits : String, opstr : String, opcode : uint, string : String, strlen : uint; //DEBUG*/ trace ( this + " @ Patch.[set]string () : Running parser ..." ); chunkPattern.lastIndex = 0; while ( matchData = chunkPattern.exec ( patch ) ) { // // Parse the chunk. aIndex = matchData [ 1 ]; aLength = matchData [ 2 ]; bIndex = matchData [ 3 ]; bLength = matchData [ 4 ]; edits = matchData [ 5 ]; //DEBUG*/ trace ( " Reading chunk ...\n -", aIndex, aLength, "+", bIndex, bLength ); if ( bIndex - aIndex != delta ) { //DEBUG*/ trace ( " [!] The difference between aIndex and bIndex does not match the current delta." ); // Try to fix the indeces. if ( aIndex < bIndex ) bIndex = aIndex + delta; else aIndex = bIndex - delta; } // Provided indexes are 1-based. aIndex --; bIndex --; // Record chunk. data.writeByte ( 0xff ); data.writeUnsignedInt ( aIndex ); a = 0; b = 0; editPattern.lastIndex = 0; while ( matchData = editPattern.exec ( edits ) ) { opstr = matchData [ 1 ]; string = decodeURIComponent ( matchData [ 2 ] ); strlen = string.length; //DEBUG*/ trace ( " " + opstr, string ); if ( opstr == '-' ) { opcode = 1; a += strlen; } else if ( opstr == '+' ) { opcode = 2; b += strlen; } else { opcode = 0; a += strlen; b += strlen; } // Record edit. writeEdit ( opcode, string ); } // Keep track of delta. if ( aLength != a ) //DEBUG*/ { //DEBUG*/ trace ( " [!] Provided aLength was wrong!" ); aLength = a; //DEBUG*/ } if ( bLength != b ) //DEBUG*/ { //DEBUG*/ trace ( " [!] Provided bLength was wrong!" ); bLength = b; //DEBUG*/ } delta += bLength - aLength; } } /** * Writes edits to the ByteArray, accounting for the string length limitation of 65535 characters. If string is longer, the edit is chopped into a sequence of shorter edits with the same opcode. * @param opcode The edit opcode - equality, insertion or deletion. * @param string The string to be written. */ private function writeEdit ( opcode : uint, string : String ) : void { var x1 : int = 0, x2 : int = 0, l : int = string.length; if ( l <= 65535 ) { data.writeByte ( opcode ); data.writeUTF ( string ); return; } while ( x1 < l ) { x2 = x1 + 65535; if ( x2 > l ) x2 = l; data.writeByte ( opcode ); data.writeUTF ( string.substring ( x1, x2 ) ); x1 = x2; } } /*// You don't need this compiled in production code. //// //// //// Demo. private var tfE : TextFormat; private var tfD : TextFormat; private var tfI : TextFormat; // // Shows off the diff in a text field. // @param originalString The original (unmodified) text, from which the patch was generated. // @param textField A textField, in which the changes will be outlined. // public function show ( originalString : String, textField : TextField ) : void { if ( !data || !data.length ) { textField.text = originalString; return; } // // Build the text. var p : uint = data.position, a : uint, str : String, len : uint, o : int, result : String = '', formatting : Array = [], f : int = 0, c : int = -1, i : uint = 0; data.position = 0; while ( data.bytesAvailable ) { o = data.readUnsignedByte (); if ( o == 0xff ) { if ( c != 0 ) { c = 0; formatting [ f ++ ] = i; formatting [ f ++ ] = c; } result += originalString.substring ( a, a = data.readUnsignedInt () ); i = result.length; } else { str = data.readUTF (); len = str.length; if ( c != o ) { c = o; formatting [ f ++ ] = i; formatting [ f ++ ] = c; } if ( o == 0 || o == 1 ) { a += len; } i += len; result += str; } } if ( a < originalString.length ) { if ( c != 0 ) { formatting [ f ++ ] = i; formatting [ f ++ ] = 0; } result += originalString.substring ( a ); } // // Apply formatting. textField.text = result; if ( !tfE ) { tfE = new TextFormat ( 'Lucida Sans Unicode', 12, 0, false, false, false ); tfD = new TextFormat ( 'Lucida Sans Unicode', 12, 0xaaaaaa, false, false, true ); tfI = new TextFormat ( 'Lucida Sans Unicode', 12, 0x2277dd, false, false, true ); } formatting [ f ++ ] = i; formatting [ f ++ ] = 0; var format : TextFormat; for ( i = 2; i < f; i += 2 ) { o = formatting [ int ( i - 1 ) ]; if ( o == 0 ) format = tfE; else if ( o == 1 ) format = tfD; else if ( o == 2 ) format = tfI; if ( formatting [ i ] > formatting [ int ( i - 2 ) ] ) textField.setTextFormat ( format, formatting [ int ( i - 2 ) ], formatting [ i ] ); } data.position = p; } //// //// //*/ } }