// // RegexKitLite.m // http://regexkit.sourceforge.net/ // Licensed under the terms of the BSD License, as specified below. // /* Copyright (c) 2008, John Engelhart All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Zang Industries nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #import #import #import #import #import #import #import #import #import #ifdef __OBJC_GC__ #import #endif #import #import #import #import #import #import #import "RegexKitLite.h" // Compile time tuneables. #ifndef RKL_CACHE_SIZE #define RKL_CACHE_SIZE 23 #endif #ifndef RKL_FIXED_LENGTH #define RKL_FIXED_LENGTH 2048 #endif #ifndef RKL_STACK_LIMIT #define RKL_STACK_LIMIT (128 * 1024) #endif #define SCRATCH_BUFFERS 4 // These macros are nearly identical to their NSCParameterAssert siblings. // This is required because nearly everything is done while cacheSpinLock is locked. // We need to safely unlock before throwing any of these exceptions. // @try {} @finally {} significantly slows things down so it's not used. #define RKLCAssert(d, ...) RKLCAssertDictionary(__PRETTY_FUNCTION__, __FILE__, __LINE__, (d), ##__VA_ARGS__) #ifdef NS_BLOCK_ASSERTIONS #define _RKLCDelayedAssertBody(c, e, g, d, ...) #else #define _RKLCDelayedAssertBody(c, e, g, d, ...) do { id *_e=(e); if(*_e!=NULL) { goto g; } if(!(c)) { *_e = RKLCAssert((d), ##__VA_ARGS__); goto g; } } while(0) #endif // NS_BLOCK_ASSERTIONS #define RKLCDelayedAssert(c, e, g) _RKLCDelayedAssertBody(c, e, g, @"Invalid parameter not satisfying: %s", #c) #define RKLRaiseException(e, f, ...) [[NSException exceptionWithName:(e) reason:RKLStringFromClassAndMethod((self), (_cmd), (f), ##__VA_ARGS__) userInfo:NULL] raise] // Ugly macros to keep other parts clean. #define NSMaxRange(r) ((r).location + (r).length) #define NSRangeInsideRange(in, win) (((((in).location - (win).location) <= (win).length) && ((NSMaxRange(in) - (win).location) <= (win).length))) #define NSEqualRanges(r1, r2) ((((r1).location == (r2).location) && ((r1).length == (r2).length))) #define NSMakeRange(loc, len) ((NSRange){(NSUInteger)(loc), (NSUInteger)(len)}) #define CFMakeRange(loc, len) ((CFRange){ (CFIndex)(loc), (CFIndex)(len)}) #define NSNotFoundRange ((NSRange){NSNotFound, 0 }) #define NSMaxiumRange ((NSRange){ 0, NSUIntegerMax}) #if defined (__GNUC__) && (__GNUC__ >= 4) #define RKL_PREFETCH(ptr, off) { const char *p = ((const char *)(ptr)) + ((off) + 64); __builtin_prefetch(p); __builtin_prefetch(p + 64); } #else #define RKL_PREFETCH(ptr, off) #endif // If the gcc flag -mmacosx-version-min is used with, for example, '=10.2', give a warning that the libicucore.dylib is only available on >= 10.3. // If you are reading this comment because of this warning, this is to let you know that linking to /usr/lib/libicucore.dylib will cause your executable to fail on < 10.3. // You will need to build your own version of the ICU library and link to that in order for RegexKitLite to work successfully on < 10.3. This is not simple. #if MAC_OS_X_VERSION_MIN_REQUIRED < 1030 #warning The ICU dynamic shared library, /usr/lib/libicucore.dylib, is only available on Mac OS X 10.3 and later. #warning You will need to supply a version of the ICU library to use RegexKitLite on Mac OS X 10.2 and earlier. #endif #define RKLGetRangeForCapture(re, s, c, r) ({ int32_t start = uregex_start((re), (int32_t)(c), (s)); if(start == -1) { r = NSNotFoundRange; } else { r.location = (NSUInteger)start; r.length = (NSUInteger)uregex_end((re), (int32_t)(c), (s)) - r.location; } *(s); }) // Exported symbols. Exception names, error domains, keys, etc. NSString * const RKLICURegexException = @"RKLICURegexException"; NSString * const RKLICURegexErrorDomain = @"RKLICURegexErrorDomain"; NSString * const RKLICURegexErrorCodeErrorKey = @"RKLICURegexErrorCode"; NSString * const RKLICURegexErrorNameErrorKey = @"RKLICURegexErrorName"; NSString * const RKLICURegexLineErrorKey = @"RKLICURegexLine"; NSString * const RKLICURegexOffsetErrorKey = @"RKLICURegexOffset"; NSString * const RKLICURegexPreContextErrorKey = @"RKLICURegexPreContext"; NSString * const RKLICURegexPostContextErrorKey = @"RKLICURegexPostContext"; NSString * const RKLICURegexRegexErrorKey = @"RKLICURegexRegex"; NSString * const RKLICURegexRegexOptionsErrorKey = @"RKLICURegexRegexOptions"; // Type / struct definitions typedef struct uregex uregex; // Opaque ICU regex type. #define U_BUFFER_OVERFLOW_ERROR 15 #define U_PARSE_CONTEXT_LEN 16 typedef struct UParseError { int32_t line; int32_t offset; UniChar preContext[U_PARSE_CONTEXT_LEN]; UniChar postContext[U_PARSE_CONTEXT_LEN]; } UParseError; enum { RKLSplitOp = 1, RKLReplaceOp = 2, RKLRangeOp = 3, RKLMaskOp = 0xf, RKLReplaceMutable = 1 << 4, }; typedef NSUInteger RKLRegexOp; typedef struct { CFStringRef string; CFHashCode hash; CFIndex length; UniChar *uniChar; } RKLBuffer; typedef struct { CFStringRef regexString; RKLRegexOptions options; uregex *icu_regex; NSInteger captureCount; CFStringRef setToString; CFHashCode setToHash; CFIndex setToLength; NSUInteger setToIsImmutable:1; NSUInteger setToNeedsConversion:1; const UniChar *setToUniChar; NSRange setToRange, lastFindRange, lastMatchRange; NSUInteger pad[1]; // For 32 bits, this makes the struct 64 bytes exactly, which is good for cache line alignment. } RKLCacheSlot; // ICU functions. See http://www.icu-project.org/apiref/icu4c/uregex_8h.html Tweaked slightly from the originals, but functionally identical. const char *u_errorName (int32_t status); int32_t u_strlen (const UniChar *s); int32_t uregex_appendReplacement (uregex *regexp, const UniChar *replacementText, int32_t replacementLength, UniChar **destBuf, int32_t *destCapacity, int32_t *status); int32_t uregex_appendTail (uregex *regexp, UniChar **destBuf, int32_t *destCapacity, int32_t *status); void uregex_close (uregex *regexp); int32_t uregex_end (uregex *regexp, int32_t groupNum, int32_t *status); BOOL uregex_find (uregex *regexp, int32_t location, int32_t *status); BOOL uregex_findNext (uregex *regexp, int32_t *status); int32_t uregex_groupCount (uregex *regexp, int32_t *status); uregex *uregex_open (const UniChar *pattern, int32_t patternLength, RKLRegexOptions flags, UParseError *parseError, int32_t *status); void uregex_reset (uregex *regexp, int32_t newIndex, int32_t *status); void uregex_setText (uregex *regexp, const UniChar *text, int32_t textLength, int32_t *status); int32_t uregex_split (uregex *regexp, UniChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UniChar *destFields[], int32_t destFieldsCapacity, int32_t *status); int32_t uregex_start (uregex *regexp, int32_t groupNum, int32_t *status); static RKLCacheSlot *getCachedRegex (NSString *regexString, RKLRegexOptions options, NSError **error, id *exception); static BOOL setCacheSlotToString (RKLCacheSlot *cacheSlot, const NSRange *range, int32_t *status, id *exception); static RKLCacheSlot *getCachedRegexSetToString (NSString *regexString, RKLRegexOptions options, NSString *matchString, NSUInteger *matchLengthPtr, NSRange *matchRange, NSError **error, id *exception, int32_t *status); static id performRegexOp (id self, SEL _cmd, RKLRegexOp doRegexOp, NSString *regexString, RKLRegexOptions options, NSInteger capture, id matchString, NSRange *matchRange, NSString *replacementString, NSError **error, void **result); static void rkl_find (RKLCacheSlot *cacheSlot, NSInteger capture, NSRange searchRange, NSRange *resultRange, id *exception, int32_t *status); static NSArray *rkl_splitArray (RKLCacheSlot *cacheSlot, id *exception, int32_t *status); static NSString *rkl_replaceString (RKLCacheSlot *cacheSlot, id searchString, NSUInteger searchU16Length, NSString *replacementString, NSUInteger replacementU16Length, NSUInteger *replacedCount, int replaceMutable, id *exception, int32_t *status); static int32_t rkl_replaceAll (RKLCacheSlot *cacheSlot, const UniChar *replacementUniChar, int32_t replacementU16Length, UniChar *replacedUniChar, int32_t replacedU16Capacity, NSUInteger *replacedCount, id *exception, int32_t *status); static void rkl_clearStringCache (void); static void clearBuffer (RKLBuffer *buffer, int freeDynamicBuffer); static void clearCacheSlotRegex (RKLCacheSlot *cacheSlot); static void clearCacheSlotSetTo (RKLCacheSlot *cacheSlot); static NSDictionary *userInfoDictionary (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status, ...); static NSError *RKLNSErrorForRegex (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status); static NSException *RKLNSExceptionForRegex (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status); static NSDictionary *RKLCAssertDictionary (const char *function, const char *file, int line, NSString *format, ...); static NSString *RKLStringFromClassAndMethod(id object, SEL selector, NSString *format, ...); #ifdef __OBJC_GC__ // If compiled with Garbage Collection, we need to be able to do a few things slightly differently. // The basic premiss is that under GC we use a trampoline function pointer which is set to a _start function to catch the first invocation. // The _start function checks if GC is running and then overwrites the function pointer with the appropriate routine. Think of it as 'lazy linking'. // rkl_collectingEnabled uses objc_getClass() to get the NSGarbageCollector class, which doesn't exist on earlier systems. // This allows for graceful failure should we find ourselves running on an earlier version of the OS without NSGarbageCollector. static BOOL rkl_collectingEnabled_first (void); static BOOL rkl_collectingEnabled_yes (void) { return(YES); } static BOOL rkl_collectingEnabled_no (void) { return(NO); } static BOOL(*rkl_collectingEnabled) (void) = rkl_collectingEnabled_first; static BOOL rkl_collectingEnabled_first (void) { return((([objc_getClass("NSGarbageCollector") defaultCollector]!=NULL) ? (rkl_collectingEnabled=rkl_collectingEnabled_yes) : (rkl_collectingEnabled=rkl_collectingEnabled_no))()); } static void *rkl_realloc_first (void **ptr, size_t size, NSUInteger flags); static void *rkl_realloc_std (void **ptr, size_t size, NSUInteger flags) { flags=flags; /*unused*/ return((*ptr = reallocf(*ptr, size))); } static void *rkl_realloc_gc (void **ptr, size_t size, NSUInteger flags) { void *p=NULL; if(flags!=0) { p=NSAllocateCollectable((NSUInteger)size,flags); if(*ptr!=NULL) { free(*ptr); *ptr=NULL; } } else { p=*ptr=reallocf(*ptr, size); } return(p); } static void *(*rkl_realloc) (void **ptr, size_t size, NSUInteger flags) = rkl_realloc_first; static void *rkl_realloc_first (void **ptr, size_t size, NSUInteger flags) { return(((rkl_collectingEnabled()==YES) ? (rkl_realloc=rkl_realloc_gc) : (rkl_realloc=rkl_realloc_std))(ptr, size, flags)); } static id rkl_CFAutorelease_first (CFTypeRef obj); static id rkl_CFAutorelease_std (CFTypeRef obj) { return([(id)obj autorelease]); } static id rkl_CFAutorelease_gc (CFTypeRef obj) { return((id)CFMakeCollectable(obj)); } static id(*rkl_CFAutorelease) (CFTypeRef obj) = rkl_CFAutorelease_first; static id rkl_CFAutorelease_first (CFTypeRef obj) { return(((rkl_collectingEnabled()==YES) ? (rkl_CFAutorelease=rkl_CFAutorelease_gc) : (rkl_CFAutorelease=rkl_CFAutorelease_std))(obj)); } #else // __OBJC_GC__ not defined static void *rkl_realloc (void **ptr, size_t size, NSUInteger flags) { flags=flags; /*unused*/ return((*ptr = reallocf(*ptr, size))); } static id rkl_CFAutorelease (CFTypeRef obj) { return([(id)obj autorelease]); } #endif // __OBJC_GC__ #ifdef RKL_FAST_MUTABLE_CHECK // We use a trampoline function pointer to check at run time if the function __CFStringIsMutable is available. // If it is, the trampoline function pointer is replaced with the address of that function. // Otherwise, we assume the worst case that ever string is mutable. // This hopefully helps to protect us since we're using an undocumented, non-public API call. // We will keep on working if it ever does go away, just with a bit less performance due to the overhead of mutable checks. static BOOL rkl_CFStringIsMutable_first (CFStringRef str); static BOOL rkl_CFStringIsMutable_yes (CFStringRef str) { str=str; /*unused*/ return(YES); } static BOOL(*rkl_CFStringIsMutable) (CFStringRef str) = rkl_CFStringIsMutable_first; static BOOL rkl_CFStringIsMutable_first (CFStringRef str) { if((rkl_CFStringIsMutable = dlsym(RTLD_DEFAULT, "__CFStringIsMutable")) == NULL) { rkl_CFStringIsMutable = rkl_CFStringIsMutable_yes; } return(rkl_CFStringIsMutable(str)); } #else // RKL_FAST_MUTABLE_CHECK is not defined. Assume that all strings are potentially mutable. #define rkl_CFStringIsMutable(s) (YES) #endif BOOL __CFStringIsMutable(CFStringRef str); // Translation unit scope global variables. static UniChar fixedUniChar[(RKL_FIXED_LENGTH)]; // This is the fixed sized UTF-16 conversion buffer. static RKLCacheSlot RKLCache[(RKL_CACHE_SIZE)], *lastCacheSlot; static OSSpinLock cacheSpinLock = OS_SPINLOCK_INIT; static RKLBuffer dynamicBuffer, fixedBuffer = {NULL, 0UL, 0L, &fixedUniChar[0]}; static const UniChar emptyUniCharString[1]; // For safety, icu_regexes are 'set' to this when the string they were searched is cleared. static void *scratchBuffer[(SCRATCH_BUFFERS)]; // Used to hold temporary allocations that are allocated via reallocf(). // These are used when running under manual memory management for the array that rkl_splitArray creates. // The split strings are created, but not autoreleased. The (immutable) array is created using these callbacks, which skips the CFRetain() call. // For each split string this saves the overhead of an autorelease, then an array retain, then a autoreleasepool release. This is good for a ~30% speed increase. static Boolean RKLCFArrayEqualCallBack (const void *value1, const void *value2) { return(CFEqual(value1, value2)); } static void RKLCFArrayRelease (CFAllocatorRef allocator, const void *ptr) { allocator=allocator;/*unused*/ CFRelease(ptr); } static CFArrayCallBacks transferOwnershipArrayCallBacks = { 0, NULL, RKLCFArrayRelease, NULL, RKLCFArrayEqualCallBack }; #if defined(RKL_REGISTER_FOR_IPHONE_LOWMEM_NOTIFICATIONS) && (RKL_REGISTER_FOR_IPHONE_LOWMEM_NOTIFICATIONS == 1) // The next few lines are specifically for the iPhone to catch low memory conditions. // The basic idea is that rkl_RegisterForLowMemoryNotifications() is set to be run once by the linker at load time via __attribute((constructor)). // rkl_RegisterForLowMemoryNotifications() tries to find the iPhone low memory notification symbol. If it can find it, // it registers with the default NSNotificationCenter to call the RKLLowMemoryWarningObserver class method +lowMemoryWarning:. // rkl_RegisterForLowMemoryNotifications() uses an atomic compare and swap to guarentee that it initalizes exactly once. // +lowMemoryWarning tries to acquire the cache lock. If it gets the lock, it clears the cache. If it can't, it calls performSelector: // with a delay of half a second to try again. This will hopefully prevent any deadlocks, such as a RegexKitLite request for // memory triggering a notifcation while the lock is held. static void rkl_RegisterForLowMemoryNotifications(void); @interface RKLLowMemoryWarningObserver : NSObject +(void)lowMemoryWarning:(id)notification; @end @implementation RKLLowMemoryWarningObserver +(void)lowMemoryWarning:(id)notification { if(OSSpinLockTry(&cacheSpinLock)) { rkl_clearStringCache(); OSSpinLockUnlock(&cacheSpinLock); } else { [[RKLLowMemoryWarningObserver class] performSelector:@selector(lowMemoryWarning:) withObject:NULL afterDelay:0.5]; } } @end static int rkl_HaveRegisteredForLowMemoryNotifications = 0; __attribute__((constructor)) static void rkl_RegisterForLowMemoryNotifications(void) { void **memoryWarningNotification = NULL; if(OSAtomicCompareAndSwapIntBarrier(0, 1, &rkl_HaveRegisteredForLowMemoryNotifications)) { if((memoryWarningNotification = dlsym(RTLD_DEFAULT, "UIApplicationDidReceiveMemoryWarningNotification")) != NULL) { [[NSNotificationCenter defaultCenter] addObserver:[RKLLowMemoryWarningObserver class] selector:@selector(lowMemoryWarning:) name:*memoryWarningNotification object:NULL]; } } } #endif // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called with cacheSpinLock already locked! // ---------- static RKLCacheSlot *getCachedRegex(NSString *regexString, RKLRegexOptions options, NSError **error, id *exception) { RKLCacheSlot *cacheSlot = NULL; CFHashCode regexHash = 0; int32_t status = 0; RKLCDelayedAssert(regexString != NULL, exception, exitNow); // Fast path the common case where this regex is exactly the same one used last time. if((lastCacheSlot != NULL) && (lastCacheSlot->options == options) && (lastCacheSlot->icu_regex != NULL) && (lastCacheSlot->regexString != NULL) && (lastCacheSlot->regexString == (CFStringRef)regexString)) { return(lastCacheSlot); } regexHash = CFHash((CFTypeRef)regexString); cacheSlot = &RKLCache[(regexHash % RKL_CACHE_SIZE)]; // Retrieve the cache slot for this regex. // Return the cached entry if it's a match, otherwise clear the slot and create a new ICU regex in its place. if((cacheSlot->options == options) && (cacheSlot->icu_regex != NULL) && (cacheSlot->regexString != NULL) && ((cacheSlot->regexString == (CFStringRef)regexString) || (CFEqual((CFTypeRef)regexString, cacheSlot->regexString) == YES))) { lastCacheSlot = cacheSlot; return(cacheSlot); } clearCacheSlotRegex(cacheSlot); if((cacheSlot->regexString = CFStringCreateCopy(NULL, (CFStringRef)regexString)) == NULL) { goto exitNow; } ; // Get a cheap immutable copy. cacheSlot->options = options; CFIndex regexStringU16Length = CFStringGetLength(cacheSlot->regexString); // In UTF16 code units. UParseError parseError = (UParseError){-1, -1, {0}, {0}}; UniChar *regexUniChar = NULL; // Try to quickly obtain regexString in UTF16 format. if((regexUniChar = (UniChar *)CFStringGetCharactersPtr(cacheSlot->regexString)) == NULL) { // We didn't get the UTF16 pointer quickly and need to perform a full conversion in a temp buffer. if((regexStringU16Length * sizeof(UniChar)) < RKL_STACK_LIMIT) { if((regexUniChar = alloca(regexStringU16Length * sizeof(UniChar))) == NULL) { goto exitNow; } } // Try to use the stack. else { if((regexUniChar = rkl_realloc(&scratchBuffer[0], regexStringU16Length * sizeof(UniChar), 0UL)) == NULL) { goto exitNow; } } // Otherwise use the heap. CFStringGetCharacters(cacheSlot->regexString, CFMakeRange(0, regexStringU16Length), (UniChar *)regexUniChar); // Convert regexString to UTF16. } // Create the ICU regex. if((cacheSlot->icu_regex = uregex_open(regexUniChar, (int32_t)regexStringU16Length, options, &parseError, &status)) == NULL) { goto exitNow; } if(status <= 0) { cacheSlot->captureCount = (NSInteger)uregex_groupCount(cacheSlot->icu_regex, &status); } if(status <= 0) { lastCacheSlot = cacheSlot; } exitNow: if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; } if(status > 0) { cacheSlot = NULL; if(error != NULL) { *error = RKLNSErrorForRegex(regexString, options, &parseError, status); } } return(cacheSlot); } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called with cacheSpinLock already locked! // ---------- static BOOL setCacheSlotToString(RKLCacheSlot *cacheSlot, const NSRange *range, int32_t *status, id *exception) { RKLCDelayedAssert((cacheSlot != NULL) && (cacheSlot->setToString != NULL) && (range != NULL) && (status != NULL), exception, exitNow); if(cacheSlot->setToNeedsConversion == NO) { goto setRegexText; } RKLBuffer *buffer = (cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer; if((cacheSlot->setToUniChar != NULL) && ((cacheSlot->setToString == buffer->string) || ((cacheSlot->setToLength == buffer->length) && (cacheSlot->setToHash == buffer->hash)))) { goto setRegexText; } clearBuffer(buffer, NO); if(cacheSlot->setToLength >= RKL_FIXED_LENGTH) { RKLCDelayedAssert(buffer == &dynamicBuffer, exception, exitNow); if((dynamicBuffer.uniChar = rkl_realloc((void *)&dynamicBuffer.uniChar, (cacheSlot->setToLength * sizeof(UniChar)), 0UL)) == NULL) { return(NO); } // Resize the buffer. } RKLCDelayedAssert(buffer->uniChar != NULL, exception, exitNow); CFStringGetCharacters(cacheSlot->setToString, CFMakeRange(0, cacheSlot->setToLength), (UniChar *)buffer->uniChar); // Convert to a UTF16 string. if((buffer->string = CFRetain(cacheSlot->setToString)) == NULL) { return(NO); } buffer->hash = cacheSlot->setToHash; buffer->length = cacheSlot->setToLength; cacheSlot->setToUniChar = buffer->uniChar; cacheSlot->setToRange = NSNotFoundRange; setRegexText: if(NSEqualRanges(cacheSlot->setToRange, *range) == NO) { RKLCDelayedAssert((cacheSlot->icu_regex != NULL) && (cacheSlot->setToUniChar != NULL) && (NSMaxRange(*range) <= (NSUInteger)cacheSlot->setToLength), exception, exitNow); cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange; cacheSlot->setToRange = *range; uregex_setText(cacheSlot->icu_regex, cacheSlot->setToUniChar + cacheSlot->setToRange.location, (int32_t)cacheSlot->setToRange.length, status); if(*status > 0) { return(NO); } } return(YES); exitNow: return(NO); } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called with cacheSpinLock already locked! // ---------- static RKLCacheSlot *getCachedRegexSetToString(NSString *regexString, RKLRegexOptions options, NSString *matchString, NSUInteger *matchLengthPtr, NSRange *matchRange, NSError **error, id *exception, int32_t *status) { RKLCacheSlot *cacheSlot = NULL; RKLCDelayedAssert((regexString != NULL) && (exception != NULL) && (status != NULL), exception, exitNow); // Fast path the common case where this regex is exactly the same one used last time. if((lastCacheSlot != NULL) && (lastCacheSlot->regexString == (CFStringRef)regexString) && (lastCacheSlot->options == options)) { cacheSlot = lastCacheSlot; } else { if((cacheSlot = getCachedRegex(regexString, options, error, exception)) == NULL) { goto exitNow; } } // Optimize the case where the string to search (matchString) is immutable and the setToString immutable copy is the same string with its reference count incremented. BOOL isSetTo = ((cacheSlot->setToString != NULL) && (cacheSlot->setToString == (CFStringRef)matchString)) ? YES : NO; CFIndex matchLength = ((isSetTo == YES) && (cacheSlot->setToIsImmutable == YES)) ? cacheSlot->setToLength : CFStringGetLength((CFStringRef)matchString); *matchLengthPtr = (NSUInteger)matchLength; if(matchRange->length == NSUIntegerMax) { matchRange->length = matchLength; } // For convenience, allow NSUIntegerMax == string length. if((NSUInteger)matchLength < NSMaxRange(*matchRange)) { *exception = [NSException exceptionWithName:NSRangeException reason:@"Range or index out of bounds" userInfo:NULL]; goto exitNow; } if((cacheSlot->setToIsImmutable == NO) && (cacheSlot->setToString != NULL) && ((cacheSlot->setToLength != CFStringGetLength(cacheSlot->setToString)) || (cacheSlot->setToHash != CFHash(cacheSlot->setToString)))) { isSetTo = NO; } else { // If the first pointer equality check failed, check the hash and length. if(((isSetTo == NO) || (cacheSlot->setToIsImmutable == NO)) && (cacheSlot->setToString != NULL)) { isSetTo = ((cacheSlot->setToLength == matchLength) && (cacheSlot->setToHash == CFHash((CFStringRef)(matchString)))); } if((isSetTo == YES)) { // Make sure that the UTF16 conversion cache is set to this string, if conversion is required. if((cacheSlot->setToNeedsConversion == YES) && (setCacheSlotToString(cacheSlot, matchRange, status, exception) == NO)) { *exception = RKLCAssert(@"Failed to set up UTF16 buffer."); goto exitNow; } if(NSEqualRanges(cacheSlot->setToRange, *matchRange) == YES) { goto exitNow; } // Verify that the range to search is what the cached regex was prepped for last time. } } // Sometimes the range that the regex is set to isn't right, in which case we don't want to clear the cache slot. Otherwise, flush it out. if((cacheSlot->setToString != NULL) && (isSetTo == NO)) { clearCacheSlotSetTo(cacheSlot); } if(cacheSlot->setToString == NULL) { cacheSlot->setToString = CFRetain(matchString); RKLCDelayedAssert(cacheSlot->setToString != NULL, exception, exitNow); cacheSlot->setToUniChar = CFStringGetCharactersPtr(cacheSlot->setToString); cacheSlot->setToNeedsConversion = (cacheSlot->setToUniChar == NULL) ? YES : NO; cacheSlot->setToIsImmutable = !rkl_CFStringIsMutable(cacheSlot->setToString); // If RKL_FAST_MUTABLE_CHECK is not defined then the result is '0', or in other words mutable.. cacheSlot->setToHash = CFHash(cacheSlot->setToString); cacheSlot->setToRange = NSNotFoundRange; cacheSlot->setToLength = matchLength; } if(setCacheSlotToString(cacheSlot, matchRange, status, exception) == NO) { cacheSlot = NULL; goto exitNow; } exitNow: return(cacheSlot); } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // ---------- static id performRegexOp(id self, SEL _cmd, RKLRegexOp doRegexOp, NSString *regexString, RKLRegexOptions options, NSInteger capture, id matchString, NSRange *matchRange, NSString *replacementString, NSError **error, void **result) { BOOL replaceMutable = ((doRegexOp & RKLReplaceMutable) != 0) ? YES : NO; RKLRegexOp regexOp = (doRegexOp & RKLMaskOp); if((error != NULL) && (*error != NULL)) { *error = NULL; } if(regexString == NULL) { RKLRaiseException(NSInvalidArgumentException, @"The regular expression argument is NULL."); } if(matchString == NULL) { RKLRaiseException(NSInternalInconsistencyException, @"The match string argument is NULL."); } if((regexOp == RKLReplaceOp) && (replacementString == NULL)) { RKLRaiseException(NSInvalidArgumentException, @"The replacement string argument is NULL."); } NSUInteger stringU16Length = 0UL, replacementU16Length = (NSUInteger)((replacementString != NULL) ? CFStringGetLength((CFStringRef)replacementString) : 0); // In UTF16 code units. NSRange stringRange = NSMakeRange(0, NSUIntegerMax), searchRange = (matchRange != NULL) ? *matchRange : NSNotFoundRange; RKLCacheSlot *cacheSlot = NULL; id exception = NULL; id resultObject = NULL; int32_t status = 0; // IMPORTANT! Once we have obtained the lock, code MUST exit via 'goto exitNow;' to unlock the lock! NO EXCEPTIONS! // ---------- OSSpinLockLock(&cacheSpinLock); // Grab the lock and get cache entry. if(((cacheSlot = getCachedRegexSetToString(regexString, options, matchString, &stringU16Length, (regexOp == RKLRangeOp) ? &stringRange : &searchRange, error, &exception, &status)) == NULL) || (exception != NULL) || (status > 0)) { goto exitNow; } if(searchRange.length == NSUIntegerMax) { searchRange.length = stringU16Length; } // For convenience. if(stringU16Length < NSMaxRange(searchRange)) { exception = [NSException exceptionWithName:NSRangeException reason:@"Range or index out of bounds" userInfo:NULL]; goto exitNow; } RKLCDelayedAssert((cacheSlot->icu_regex != NULL) && (exception == NULL), &exception, exitNow); if(cacheSlot->setToNeedsConversion != 0) { RKLBuffer *buffer = (cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer; RKLCDelayedAssert((cacheSlot->setToHash == buffer->hash) && (cacheSlot->setToLength == buffer->length) && (cacheSlot->setToUniChar == buffer->uniChar), &exception, exitNow); } switch(regexOp) { case RKLRangeOp: rkl_find(cacheSlot, capture, searchRange, (NSRange *)result, &exception, &status); break; case RKLSplitOp: resultObject = rkl_splitArray(cacheSlot, &exception, &status); break; case RKLReplaceOp: resultObject = rkl_replaceString(cacheSlot, matchString, stringU16Length, replacementString, replacementU16Length, (NSUInteger *)result, replaceMutable, &exception, &status); break; default: exception = RKLCAssert(@"Unknown regexOp code."); break; } exitNow: OSSpinLockUnlock(&cacheSpinLock); if((status > 0) && (exception == NULL)) { exception = RKLNSExceptionForRegex(regexString, options, NULL, status); } // If we had a problem, throw an exception. if(exception != NULL) { if([exception isKindOfClass:[NSException class]]) { [[NSException exceptionWithName:[exception name] reason:RKLStringFromClassAndMethod(self, _cmd, [exception reason]) userInfo:[exception userInfo]] raise]; } else { [[NSAssertionHandler currentHandler] handleFailureInFunction:[exception objectForKey:@"function"] file:[exception objectForKey:@"file"] lineNumber:[[exception objectForKey:@"line"] longValue] description:[exception objectForKey:@"description"]]; } } if(replaceMutable == YES) { // We're working on a mutable string and if there were successfull matches with replaced text we still have work to do. Done outside the cache lock. if(*((NSUInteger *)result) > 0) { NSCParameterAssert(resultObject != NULL); [matchString replaceCharactersInRange:searchRange withString:resultObject]; } } return(resultObject); } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called from performRegexOp(). // ---------- static void rkl_find(RKLCacheSlot *cacheSlot, NSInteger capture, NSRange searchRange, NSRange *resultRange, id *exception, int32_t *status) { NSRange captureRange = NSNotFoundRange; RKLCDelayedAssert((cacheSlot != NULL) && (resultRange != NULL) && (exception != NULL) && (status != NULL), exception, exitNow); if((capture < 0) || (capture > cacheSlot->captureCount)) { *exception = [NSException exceptionWithName:NSInvalidArgumentException reason:@"The capture argument is not valid." userInfo:NULL]; goto exitNow; } if((NSEqualRanges(searchRange, cacheSlot->lastFindRange) == NO)) { // Only perform an expensive 'find' operation iff the current find range is different than the last find range. RKL_PREFETCH(cacheSlot->setToUniChar, searchRange.location << 1); // Spool up the CPU caches. // Using uregex_findNext can be a slight performance win. BOOL useFindNext = (searchRange.location == (NSMaxRange(cacheSlot->lastMatchRange) + ((cacheSlot->lastMatchRange.length == 0) ? 1 : 0))) ? YES : NO; cacheSlot->lastFindRange = NSNotFoundRange; // Cleared the cached search/find range. if(useFindNext == NO) { if((uregex_find (cacheSlot->icu_regex, (int32_t)searchRange.location, status) == NO) || (*status > 0)) { goto exitNow; } } else { if((uregex_findNext(cacheSlot->icu_regex, status) == NO) || (*status > 0)) { goto exitNow; } } if(RKLGetRangeForCapture(cacheSlot->icu_regex, status, 0, cacheSlot->lastMatchRange) != 0) { goto exitNow; } if(NSRangeInsideRange(cacheSlot->lastMatchRange, searchRange) == NO) { goto exitNow; } // If the regex matched outside the requested range, exit. cacheSlot->lastFindRange = searchRange; // Cache the successful search/find range. } if(capture == 0) { captureRange = cacheSlot->lastMatchRange; } else { RKLGetRangeForCapture(cacheSlot->icu_regex, status, capture, captureRange); } exitNow: *resultRange = captureRange; } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called from performRegexOp(). // ---------- static NSArray *rkl_splitArray(RKLCacheSlot *cacheSlot, id *exception, int32_t *status) { NSArray *resultArray = NULL; RKLCDelayedAssert((cacheSlot != NULL) && (status != NULL), exception, exitNow); const char *setToUniCharChar = (const char *)(cacheSlot->setToUniChar + cacheSlot->setToRange.location); NSUInteger splitRangesCapacity = ((((RKL_STACK_LIMIT / sizeof(NSRange)) / 4) + ((cacheSlot->captureCount + 1) * 2)) + 2), splitRangesIndex = 0, lastLocation = 0, x = 0; size_t splitRangesSize = (splitRangesCapacity * sizeof(NSRange)), stackUsed = 0; NSInteger captureCount = cacheSlot->captureCount; uregex *icu_regex = cacheSlot->icu_regex; NSRange *splitRanges = NULL; BOOL copiedStackToHeap = NO; if(cacheSlot->setToLength == 0) { resultArray = [NSArray array]; goto exitNow; } // Return an empty array when there is nothing to search. if(splitRangesSize < RKL_STACK_LIMIT) { if((splitRanges = alloca(splitRangesSize)) == NULL) { goto exitNow; } stackUsed += splitRangesSize; } else { if((splitRanges = rkl_realloc(&scratchBuffer[0], splitRangesSize, 0UL)) == NULL) { goto exitNow; } } cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange; // Clear the cached find information for this regex so a subsequent find works correctly. uregex_reset(icu_regex, 0, status); // Reset the regex to the start of the string. for(splitRangesIndex = 0; splitRangesIndex < splitRangesCapacity; splitRangesIndex++) { if(splitRangesIndex >= ((splitRangesCapacity - ((captureCount + 1) * 2)) - 1)) { // Check if we need to grow our NSRanges buffer. NSUInteger newCapacity = (((splitRangesCapacity + (splitRangesCapacity / 2)) + ((captureCount + 1) * 2)) + 2); size_t newSize = (newCapacity * sizeof(NSRange)); NSRange *newRanges = NULL; if((newRanges = rkl_realloc(&scratchBuffer[0], newSize, 0UL)) == NULL) { goto exitNow; } // We only try to use the stack the first time, after that, we use the heap. if((stackUsed > 0) && (copiedStackToHeap == NO)) { memcpy(newRanges, splitRanges, splitRangesSize); copiedStackToHeap = YES; } splitRangesCapacity = newCapacity; splitRangesSize = newSize; splitRanges = newRanges; } RKL_PREFETCH(setToUniCharChar, lastLocation << 1); // Spool up the CPU caches. NSUInteger baseMatchIndex = splitRangesIndex; NSRange tempRange; if((uregex_findNext(icu_regex, status) == NO) || (*status > 0)) { break; } if(RKLGetRangeForCapture(icu_regex, status, 0, tempRange) > 0) { goto exitNow; } splitRanges[splitRangesIndex] = NSMakeRange(lastLocation, tempRange.location - lastLocation); lastLocation = NSMaxRange(tempRange); int32_t capture; for(capture = 1; capture <= captureCount; capture++) { RKLCDelayedAssert(splitRangesIndex < (splitRangesCapacity - 2), exception, exitNow); splitRangesIndex++; if(RKLGetRangeForCapture(icu_regex, status, capture, splitRanges[splitRangesIndex]) > 0) { goto exitNow; } if(splitRanges[splitRangesIndex].location == NSNotFound) { splitRanges[splitRangesIndex] = NSMakeRange(splitRanges[baseMatchIndex].location, 0); } } } RKLCDelayedAssert(splitRangesIndex < (splitRangesCapacity - 2), exception, exitNow); splitRanges[splitRangesIndex] = NSMakeRange(lastLocation, (NSMaxRange(cacheSlot->setToRange) - cacheSlot->setToRange.location) - lastLocation); splitRangesIndex++; CFIndex setToLocation = cacheSlot->setToRange.location; CFStringRef setToString = cacheSlot->setToString; size_t splitStringsSize = (splitRangesIndex * sizeof(id)); id *splitStrings = NULL; if((stackUsed + splitStringsSize) < RKL_STACK_LIMIT) { if((splitStrings = alloca(splitStringsSize)) == NULL) { goto exitNow; } stackUsed += splitStringsSize; } #ifdef __OBJC_GC__ else { if((splitStrings = rkl_realloc(&scratchBuffer[1], splitStringsSize, (NSUInteger)NSScannedOption)) == NULL) { goto exitNow; } } #else // http://sourceforge.net/tracker/index.php?func=detail&aid=2050825&group_id=204582&atid=990188 // This is to get around an iPhone quirk. For whatever reason, the iPhone NSZone.h explicitly removes all NSAllocateCollectable() // bits and pieces using #if pre-processor conditions. Since NSScannedOption is only really used when the compiler has -fobjc-gc enabled, // we just chop it out here. else { if((splitStrings = rkl_realloc(&scratchBuffer[1], splitStringsSize, 0)) == NULL) { goto exitNow; } } #endif #ifdef __OBJC_GC__ if(rkl_collectingEnabled() == YES) { // I just don't trust the GC system with the faster CF way of doing things... It never seems to work quite the way you expect it to. for(x = 0; x < splitRangesIndex; x++) { // Optimize the case where the length == 0 by substituting the string @"". splitStrings[x] = (splitRanges[x].length == 0) ? @"" : [(id)setToString substringWithRange:NSMakeRange(setToLocation + splitRanges[x].location, splitRanges[x].length)]; } resultArray = [NSArray arrayWithObjects:splitStrings count:splitRangesIndex]; } else #endif { // This block of code is always compiled in. It is used when not compiled with GC or when compiled with GC but the collector is not enabled. for(x = 0; x < splitRangesIndex; x++) { // Optimize the case where the length == 0 by substituting the string @"". splitStrings[x] = (splitRanges[x].length == 0) ? @"" : (id)CFStringCreateWithSubstring(NULL, setToString, CFMakeRange(setToLocation + splitRanges[x].location, (CFIndex)splitRanges[x].length)); } resultArray = rkl_CFAutorelease(CFArrayCreate(NULL, (const void **)splitStrings, (CFIndex)splitRangesIndex, &transferOwnershipArrayCallBacks)); // Create the CF/NSArray of the split strings. } exitNow: if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; } if(scratchBuffer[1] != NULL) { free(scratchBuffer[1]); scratchBuffer[1] = NULL; } return(resultArray); } // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity. // IMPORTANT! Should only be called from performRegexOp(). // ---------- static NSString *rkl_replaceString(RKLCacheSlot *cacheSlot, id searchString, NSUInteger searchU16Length, NSString *replacementString, NSUInteger replacementU16Length, NSUInteger *replacedCountPtr, int replaceMutable, id *exception, int32_t *status) { int32_t resultU16Length = 0, tempUniCharBufferU16Capacity = 0; UniChar *tempUniCharBuffer = NULL; const UniChar *replacementUniChar = NULL; id resultObject = NULL; NSUInteger replacedCount = 0; // Zero order approximation of the buffer sizes for holding the replaced string or split strings and split strings pointer offsets. As UTF16 code units. tempUniCharBufferU16Capacity = (int32_t)(16 + (searchU16Length + (searchU16Length >> 1)) + (replacementU16Length * 2)); // Buffer sizes converted from native units to bytes. size_t stackSize = 0, replacementSize = (replacementU16Length * sizeof(UniChar)), tempUniCharBufferSize = (tempUniCharBufferU16Capacity * sizeof(UniChar)); // For the various buffers we require, we first try to allocate from the stack if we're not over the RKL_STACK_LIMIT. If we are, switch to using the heap for the buffer. if((stackSize + tempUniCharBufferSize) < RKL_STACK_LIMIT) { if((tempUniCharBuffer = alloca(tempUniCharBufferSize)) == NULL) { goto exitNow; } stackSize += tempUniCharBufferSize; } else { if((tempUniCharBuffer = rkl_realloc(&scratchBuffer[0], tempUniCharBufferSize, 0UL)) == NULL) { goto exitNow; } } // Try to get the pointer to the replacement strings UTF16 data. If we can't, allocate some buffer space, then covert to UTF16. if((replacementUniChar = CFStringGetCharactersPtr((CFStringRef)replacementString)) == NULL) { if((stackSize + replacementSize) < RKL_STACK_LIMIT) { if((replacementUniChar = alloca(replacementSize)) == NULL) { goto exitNow; } stackSize += replacementSize; } else { if((replacementUniChar = rkl_realloc(&scratchBuffer[1], replacementSize, 0UL)) == NULL) { goto exitNow; } } CFStringGetCharacters((CFStringRef)replacementString, CFMakeRange(0, replacementU16Length), (UniChar *)replacementUniChar); // Convert to a UTF16 string. } cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange; // Clear the cached find information for this regex so a subsequent find works correctly. resultU16Length = rkl_replaceAll(cacheSlot, replacementUniChar, (int32_t)replacementU16Length, tempUniCharBuffer, tempUniCharBufferU16Capacity, &replacedCount, exception, status); if(*status == U_BUFFER_OVERFLOW_ERROR) { // Our buffer guess(es) were too small. Resize the buffers and try again. tempUniCharBufferSize = ((tempUniCharBufferU16Capacity = resultU16Length + 4) * sizeof(UniChar)); if((stackSize + tempUniCharBufferSize) < RKL_STACK_LIMIT) { if((tempUniCharBuffer = alloca(tempUniCharBufferSize)) == NULL) { goto exitNow; } stackSize += tempUniCharBufferSize; } else { if((tempUniCharBuffer = rkl_realloc(&scratchBuffer[0], tempUniCharBufferSize, 0UL)) == NULL) { goto exitNow; } } *status = 0; // Make sure the status var is cleared and try again. resultU16Length = rkl_replaceAll(cacheSlot, replacementUniChar, (int32_t)replacementU16Length, tempUniCharBuffer, tempUniCharBufferU16Capacity, &replacedCount, exception, status); } if(*status > 0) { goto exitNow; } // Something went wrong. if(resultU16Length == 0) { resultObject = @""; } // Optimize the case where the replaced text length == 0 with a @"" string. else if(((NSUInteger)resultU16Length == searchU16Length) && (replacedCount == 0)) { // Optimize the case where the replacement == original by creating a copy. Very fast if self is immutable. if(replaceMutable == NO) { resultObject = rkl_CFAutorelease(CFStringCreateCopy(NULL, (CFStringRef)searchString)); } // .. but only if this is not replacing a mutable self. } else { resultObject = rkl_CFAutorelease(CFStringCreateWithCharacters(NULL, tempUniCharBuffer, (CFIndex)resultU16Length)); } // otherwise, create a new string. // If replaceMutable == YES, we don't do the replacement here. We wait until after we return and unlock the cache lock. // This is because we may be trying to mutate an immutable string object. if((replacedCount > 0) && (replaceMutable == YES)) { // We're working on a mutable string and there were successfull matches with replaced text, so there's work to do. clearBuffer((cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer, NO); clearCacheSlotSetTo(cacheSlot); // Flush any cached information about this string since it will mutate. } exitNow: if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; } if(scratchBuffer[1] != NULL) { free(scratchBuffer[1]); scratchBuffer[1] = NULL; } if(replacedCountPtr != NULL) { *replacedCountPtr = replacedCount; } return(resultObject); } // Modified version of the ICU libraries uregex_replaceAll() that keeps count of the number of replacements made. static int32_t rkl_replaceAll(RKLCacheSlot *cacheSlot, const UniChar *replacementUniChar, int32_t replacementU16Length, UniChar *replacedUniChar, int32_t replacedU16Capacity, NSUInteger *replacedCount, id *exception, int32_t *status) { NSUInteger replaced = 0; int32_t u16Length = 0; RKLCDelayedAssert((cacheSlot != NULL) && (replacementUniChar != NULL) && (replacedUniChar != NULL) && (status != NULL), exception, exitNow); uregex_reset(cacheSlot->icu_regex, 0, status); // Work around for ICU uregex_reset() bug, see http://bugs.icu-project.org/trac/ticket/6545 // http://sourceforge.net/tracker/index.php?func=detail&aid=2105213&group_id=204582&atid=990188 if((cacheSlot->setToLength == 0) && (*status == 8)) { *status = 0; } while(uregex_findNext(cacheSlot->icu_regex, status)) { replaced++; u16Length += uregex_appendReplacement(cacheSlot->icu_regex, replacementUniChar, replacementU16Length, &replacedUniChar, &replacedU16Capacity, status); } u16Length += uregex_appendTail(cacheSlot->icu_regex, &replacedUniChar, &replacedU16Capacity, status); if(replacedCount != 0) { *replacedCount = replaced; } exitNow: return(u16Length); } static void rkl_clearStringCache(void) { NSCParameterAssert(cacheSpinLock != 0); lastCacheSlot = NULL; NSUInteger x = 0; for(x = 0; x < SCRATCH_BUFFERS; x++) { if(scratchBuffer[x] != NULL) { free(scratchBuffer[x]); scratchBuffer[x] = NULL; } } for(x = 0; x < RKL_CACHE_SIZE; x++) { clearCacheSlotRegex(&RKLCache[x]); clearCacheSlotSetTo(&RKLCache[x]); } clearBuffer(&fixedBuffer, NO); clearBuffer(&dynamicBuffer, YES); } static void clearBuffer(RKLBuffer *buffer, int freeDynamicBuffer) { if(buffer == NULL) { return; } if((freeDynamicBuffer == YES) && (buffer->uniChar != NULL) && (buffer == &dynamicBuffer)) { free(dynamicBuffer.uniChar); dynamicBuffer.uniChar = NULL; } if(buffer->string != NULL) { CFRelease(buffer->string); buffer->string = NULL; } buffer->length = 0L; buffer->hash = 0UL; } static void clearCacheSlotRegex(RKLCacheSlot *cacheSlot) { if(cacheSlot == NULL) { return; } if(cacheSlot->regexString != NULL) { CFRelease(cacheSlot->regexString); cacheSlot->regexString = NULL; cacheSlot->options = 0U; } if(cacheSlot->icu_regex != NULL) { uregex_close(cacheSlot->icu_regex); cacheSlot->icu_regex = NULL; cacheSlot->captureCount = -1L; } if(cacheSlot->setToString != NULL) { clearCacheSlotSetTo(cacheSlot); } } static void clearCacheSlotSetTo(RKLCacheSlot *cacheSlot) { if(cacheSlot == NULL) { return; } if(cacheSlot->icu_regex != NULL) { int32_t status = 0; uregex_setText(cacheSlot->icu_regex, &emptyUniCharString[0], 0, &status); } if(cacheSlot->setToString != NULL) { CFRelease(cacheSlot->setToString); cacheSlot->setToString = NULL; } cacheSlot->setToLength = 0L; cacheSlot->setToHash = 0UL; cacheSlot->setToIsImmutable = cacheSlot->setToNeedsConversion = 0UL; cacheSlot->lastFindRange = cacheSlot->lastMatchRange = cacheSlot->setToRange = NSNotFoundRange; cacheSlot->setToUniChar = NULL; } // Helps to keep things tidy. #define addKeyAndObject(objs, keys, i, k, o) ({id _o=(o), _k=(k); if((_o != NULL) && (_k != NULL)) { objs[i] = _o; keys[i] = _k; i++; } }) static NSDictionary *userInfoDictionary(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status, ...) { va_list varArgsList; va_start(varArgsList, status); if(regexString == NULL) { return(NULL); } id objects[64], keys[64]; NSUInteger count = 0; NSString *errorNameString = [NSString stringWithUTF8String:u_errorName(status)]; addKeyAndObject(objects, keys, count, RKLICURegexRegexErrorKey, regexString); addKeyAndObject(objects, keys, count, RKLICURegexRegexOptionsErrorKey, [NSNumber numberWithUnsignedInt:options]); addKeyAndObject(objects, keys, count, RKLICURegexErrorCodeErrorKey, [NSNumber numberWithInt:status]); addKeyAndObject(objects, keys, count, RKLICURegexErrorNameErrorKey, errorNameString); if((parseError != NULL) && (parseError->line != -1)) { NSString *preContextString = [NSString stringWithCharacters:&parseError->preContext[0] length:(NSUInteger)u_strlen(&parseError->preContext[0])]; NSString *postContextString = [NSString stringWithCharacters:&parseError->postContext[0] length:(NSUInteger)u_strlen(&parseError->postContext[0])]; addKeyAndObject(objects, keys, count, RKLICURegexLineErrorKey, [NSNumber numberWithInt:parseError->line]); addKeyAndObject(objects, keys, count, RKLICURegexOffsetErrorKey, [NSNumber numberWithInt:parseError->offset]); addKeyAndObject(objects, keys, count, RKLICURegexPreContextErrorKey, preContextString); addKeyAndObject(objects, keys, count, RKLICURegexPostContextErrorKey, postContextString); addKeyAndObject(objects, keys, count, @"NSLocalizedFailureReason", ([NSString stringWithFormat:@"The error %@ occurred at line %d, column %d: %@<>%@", errorNameString, parseError->line, parseError->offset, preContextString, postContextString])); } else { addKeyAndObject(objects, keys, count, @"NSLocalizedFailureReason", ([NSString stringWithFormat:@"The error %@ occurred.", errorNameString])); } while(count < 62) { id obj = va_arg(varArgsList, id), key = va_arg(varArgsList, id); if((obj != NULL) && (key != NULL)) { addKeyAndObject(objects, keys, count, key, obj); } else { break; } } return([NSDictionary dictionaryWithObjects:&objects[0] forKeys:&keys[0] count:count]); } static NSError *RKLNSErrorForRegex(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status) { return([NSError errorWithDomain:RKLICURegexErrorDomain code:(NSInteger)status userInfo:userInfoDictionary(regexString, options, parseError, status, @"There was an error compiling the regular expression.", @"NSLocalizedDescription", NULL)]); } static NSException *RKLNSExceptionForRegex(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status) { return([NSException exceptionWithName:RKLICURegexException reason:[NSString stringWithFormat:@"ICU regular expression error #%d, %s", status, u_errorName(status)] userInfo:userInfoDictionary(regexString, options, parseError, status, NULL)]); } static NSDictionary *RKLCAssertDictionary(const char *function, const char *file, int line, NSString *format, ...) { va_list varArgsList; va_start(varArgsList, format); NSString *formatString = [[[NSString alloc] initWithFormat:format arguments:varArgsList] autorelease]; va_end(varArgsList); NSString *functionString = [NSString stringWithUTF8String:function], *fileString = [NSString stringWithUTF8String:file]; return([NSDictionary dictionaryWithObjectsAndKeys:formatString, @"description", functionString, @"function", fileString, @"file", [NSNumber numberWithInt:line], @"line", NSInternalInconsistencyException, @"exceptionName", NULL]); } static NSString *RKLStringFromClassAndMethod(id object, SEL selector, NSString *format, ...) { va_list varArgsList; va_start(varArgsList, format); NSString *formatString = [[[NSString alloc] initWithFormat:format arguments:varArgsList] autorelease]; va_end(varArgsList); Class objectsClass = [object class]; return([NSString stringWithFormat:@"*** %c[%@ %@]: %@", (object == objectsClass) ? '+' : '-', NSStringFromClass(objectsClass), NSStringFromSelector(selector), formatString]); } @implementation NSString (RegexKitLiteAdditions) // Class methods + (void)RKL_METHOD_PREPEND(clearStringCache) { OSSpinLockLock(&cacheSpinLock); rkl_clearStringCache(); OSSpinLockUnlock(&cacheSpinLock); } // captureCountForRegex: + (NSInteger)RKL_METHOD_PREPEND(captureCountForRegex):(NSString *)regex { return([self RKL_METHOD_PREPEND(captureCountForRegex):regex options:RKLNoOptions error:NULL]); } + (NSInteger)RKL_METHOD_PREPEND(captureCountForRegex):(NSString *)regex options:(RKLRegexOptions)options error:(NSError **)error { if((error != NULL) && (*error != NULL)) { *error = NULL; } if(regex == NULL) { RKLRaiseException(NSInvalidArgumentException, @"The regular expression argument is NULL."); } NSException *exception = NULL; RKLCacheSlot *cacheSlot = NULL; NSInteger captureCount = -1; OSSpinLockLock(&cacheSpinLock); if((cacheSlot = getCachedRegex(regex, options, error, &exception)) != NULL) { captureCount = cacheSlot->captureCount; } OSSpinLockUnlock(&cacheSpinLock); if(exception != NULL) { [exception raise]; } return(captureCount); } // Instance methods // componentsSeparatedByRegex: - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex { NSRange range = NSMaxiumRange; return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, 0, 0L, self, &range, NULL, NULL, NULL)); } - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex range:(NSRange)range { return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, 0, 0L, self, &range, NULL, NULL, NULL)); } - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex options:(RKLRegexOptions)options range:(NSRange)range error:(NSError **)error { return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, options, 0L, self, &range, NULL, error, NULL)); } // isMatchedByRegex: - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex { NSRange result = NSNotFoundRange, range = NSMaxiumRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result)); return((result.location == NSNotFound) ? NO : YES); } - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex inRange:(NSRange)range { NSRange result = NSNotFoundRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result)); return((result.location == NSNotFound) ? NO : YES); } - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range error:(NSError **)error { NSRange result = NSNotFoundRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, options, 0L, self, &range, NULL, error, (void **)((void *)&result)); return((result.location == NSNotFound) ? NO : YES); } // rangeOfRegex: - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex { NSRange result = NSNotFoundRange, range = NSMaxiumRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result)); return(result); } - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex capture:(NSInteger)capture { NSRange result = NSNotFoundRange, range = NSMaxiumRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, capture, self, &range, NULL, NULL, (void **)((void *)&result)); return(result); } - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex inRange:(NSRange)range { NSRange result = NSNotFoundRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result)); return(result); } - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range capture:(NSInteger)capture error:(NSError **)error { NSRange result = NSNotFoundRange; performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, options, capture, self, &range, NULL, error, (void **)((void *)&result)); return(result); } // stringByMatching: - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex { return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:NSMaxiumRange capture:0L error:NULL]); } - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex capture:(NSInteger)capture { return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:NSMaxiumRange capture:capture error:NULL]); } - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex inRange:(NSRange)range { return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:range capture:0L error:NULL]); } - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range capture:(NSInteger)capture error:(NSError **)error { NSRange matchedRange = [self RKL_METHOD_PREPEND(rangeOfRegex):regex options:options inRange:range capture:capture error:error]; return((matchedRange.location == NSNotFound) ? NULL : rkl_CFAutorelease(CFStringCreateWithSubstring(NULL, (CFStringRef)self, CFMakeRange(matchedRange.location, matchedRange.length)))); } // stringByReplacingOccurrencesOfRegex: - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement { NSRange searchRange = NSMaxiumRange; return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, 0, 0L, self, &searchRange, replacement, NULL, NULL)); } - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement range:(NSRange)searchRange { return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, 0, 0L, self, &searchRange, replacement, NULL, NULL)); } - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement options:(RKLRegexOptions)options range:(NSRange)searchRange error:(NSError **)error { return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, options, 0L, self, &searchRange, replacement, error, NULL)); } @end @implementation NSMutableString (RegexKitLiteAdditions) // replaceOccurrencesOfRegex: - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement { NSRange searchRange = NSMaxiumRange; NSUInteger replacedCount = 0; performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, 0, 0L, self, &searchRange, replacement, NULL, (void **)((void *)&replacedCount)); return(replacedCount); } - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement range:(NSRange)searchRange { NSUInteger replacedCount = 0; performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, 0, 0L, self, &searchRange, replacement, NULL, (void **)((void *)&replacedCount)); return(replacedCount); } - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement options:(RKLRegexOptions)options range:(NSRange)searchRange error:(NSError **)error { NSUInteger replacedCount = 0; performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, options, 0L, self, &searchRange, replacement, error, (void **)((void *)&replacedCount)); return(replacedCount); } @end