3 // http://regexkit.sourceforge.net/
4 // Licensed under the terms of the BSD License, as specified below.
8 Copyright (c) 2008, John Engelhart
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright
16 notice, this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the Zang Industries nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
32 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #import <CoreFoundation/CFBase.h>
40 #import <CoreFoundation/CFArray.h>
41 #import <CoreFoundation/CFString.h>
42 #import <Foundation/NSArray.h>
43 #import <Foundation/NSDictionary.h>
44 #import <Foundation/NSError.h>
45 #import <Foundation/NSException.h>
46 #import <Foundation/NSNotification.h>
47 #import <Foundation/NSRunLoop.h>
49 #import <Foundation/NSGarbageCollector.h>
51 #import <libkern/OSAtomic.h>
52 #import <AvailabilityMacros.h>
57 #import "RegexKitLite.h"
59 // Compile time tuneables.
61 #ifndef RKL_CACHE_SIZE
62 #define RKL_CACHE_SIZE 23
65 #ifndef RKL_FIXED_LENGTH
66 #define RKL_FIXED_LENGTH 2048
69 #ifndef RKL_STACK_LIMIT
70 #define RKL_STACK_LIMIT (128 * 1024)
73 #define SCRATCH_BUFFERS 4
75 // These macros are nearly identical to their NSCParameterAssert siblings.
76 // This is required because nearly everything is done while cacheSpinLock is locked.
77 // We need to safely unlock before throwing any of these exceptions.
78 // @try {} @finally {} significantly slows things down so it's not used.
79 #define RKLCAssert(d, ...) RKLCAssertDictionary(__PRETTY_FUNCTION__, __FILE__, __LINE__, (d), ##__VA_ARGS__)
80 #ifdef NS_BLOCK_ASSERTIONS
81 #define _RKLCDelayedAssertBody(c, e, g, d, ...)
83 #define _RKLCDelayedAssertBody(c, e, g, d, ...) do { id *_e=(e); if(*_e!=NULL) { goto g; } if(!(c)) { *_e = RKLCAssert((d), ##__VA_ARGS__); goto g; } } while(0)
84 #endif // NS_BLOCK_ASSERTIONS
85 #define RKLCDelayedAssert(c, e, g) _RKLCDelayedAssertBody(c, e, g, @"Invalid parameter not satisfying: %s", #c)
87 #define RKLRaiseException(e, f, ...) [[NSException exceptionWithName:(e) reason:RKLStringFromClassAndMethod((self), (_cmd), (f), ##__VA_ARGS__) userInfo:NULL] raise]
89 // Ugly macros to keep other parts clean.
91 #define NSMaxRange(r) ((r).location + (r).length)
92 #define NSRangeInsideRange(in, win) (((((in).location - (win).location) <= (win).length) && ((NSMaxRange(in) - (win).location) <= (win).length)))
93 #define NSEqualRanges(r1, r2) ((((r1).location == (r2).location) && ((r1).length == (r2).length)))
94 #define NSMakeRange(loc, len) ((NSRange){(NSUInteger)(loc), (NSUInteger)(len)})
95 #define CFMakeRange(loc, len) ((CFRange){ (CFIndex)(loc), (CFIndex)(len)})
96 #define NSNotFoundRange ((NSRange){NSNotFound, 0 })
97 #define NSMaxiumRange ((NSRange){ 0, NSUIntegerMax})
99 #if defined (__GNUC__) && (__GNUC__ >= 4)
100 #define RKL_PREFETCH(ptr, off) { const char *p = ((const char *)(ptr)) + ((off) + 64); __builtin_prefetch(p); __builtin_prefetch(p + 64); }
102 #define RKL_PREFETCH(ptr, off)
105 // If the gcc flag -mmacosx-version-min is used with, for example, '=10.2', give a warning that the libicucore.dylib is only available on >= 10.3.
106 // If you are reading this comment because of this warning, this is to let you know that linking to /usr/lib/libicucore.dylib will cause your executable to fail on < 10.3.
107 // You will need to build your own version of the ICU library and link to that in order for RegexKitLite to work successfully on < 10.3. This is not simple.
109 #if MAC_OS_X_VERSION_MIN_REQUIRED < 1030
110 #warning The ICU dynamic shared library, /usr/lib/libicucore.dylib, is only available on Mac OS X 10.3 and later.
111 #warning You will need to supply a version of the ICU library to use RegexKitLite on Mac OS X 10.2 and earlier.
114 #define RKLGetRangeForCapture(re, s, c, r) ({ int32_t start = uregex_start((re), (int32_t)(c), (s)); if(start == -1) { r = NSNotFoundRange; } else { r.location = (NSUInteger)start; r.length = (NSUInteger)uregex_end((re), (int32_t)(c), (s)) - r.location; } *(s); })
116 // Exported symbols. Exception names, error domains, keys, etc.
117 NSString * const RKLICURegexException = @"RKLICURegexException";
119 NSString * const RKLICURegexErrorDomain = @"RKLICURegexErrorDomain";
121 NSString * const RKLICURegexErrorCodeErrorKey = @"RKLICURegexErrorCode";
122 NSString * const RKLICURegexErrorNameErrorKey = @"RKLICURegexErrorName";
123 NSString * const RKLICURegexLineErrorKey = @"RKLICURegexLine";
124 NSString * const RKLICURegexOffsetErrorKey = @"RKLICURegexOffset";
125 NSString * const RKLICURegexPreContextErrorKey = @"RKLICURegexPreContext";
126 NSString * const RKLICURegexPostContextErrorKey = @"RKLICURegexPostContext";
127 NSString * const RKLICURegexRegexErrorKey = @"RKLICURegexRegex";
128 NSString * const RKLICURegexRegexOptionsErrorKey = @"RKLICURegexRegexOptions";
130 // Type / struct definitions
132 typedef struct uregex uregex; // Opaque ICU regex type.
134 #define U_BUFFER_OVERFLOW_ERROR 15
136 #define U_PARSE_CONTEXT_LEN 16
138 typedef struct UParseError {
141 UniChar preContext[U_PARSE_CONTEXT_LEN];
142 UniChar postContext[U_PARSE_CONTEXT_LEN];
150 RKLReplaceMutable = 1 << 4,
152 typedef NSUInteger RKLRegexOp;
162 CFStringRef regexString;
163 RKLRegexOptions options;
165 NSInteger captureCount;
167 CFStringRef setToString;
168 CFHashCode setToHash;
170 NSUInteger setToIsImmutable:1;
171 NSUInteger setToNeedsConversion:1;
172 const UniChar *setToUniChar;
173 NSRange setToRange, lastFindRange, lastMatchRange;
174 NSUInteger pad[1]; // For 32 bits, this makes the struct 64 bytes exactly, which is good for cache line alignment.
177 // ICU functions. See http://www.icu-project.org/apiref/icu4c/uregex_8h.html Tweaked slightly from the originals, but functionally identical.
178 const char *u_errorName (int32_t status);
179 int32_t u_strlen (const UniChar *s);
180 int32_t uregex_appendReplacement (uregex *regexp, const UniChar *replacementText, int32_t replacementLength, UniChar **destBuf, int32_t *destCapacity, int32_t *status);
181 int32_t uregex_appendTail (uregex *regexp, UniChar **destBuf, int32_t *destCapacity, int32_t *status);
182 void uregex_close (uregex *regexp);
183 int32_t uregex_end (uregex *regexp, int32_t groupNum, int32_t *status);
184 BOOL uregex_find (uregex *regexp, int32_t location, int32_t *status);
185 BOOL uregex_findNext (uregex *regexp, int32_t *status);
186 int32_t uregex_groupCount (uregex *regexp, int32_t *status);
187 uregex *uregex_open (const UniChar *pattern, int32_t patternLength, RKLRegexOptions flags, UParseError *parseError, int32_t *status);
188 void uregex_reset (uregex *regexp, int32_t newIndex, int32_t *status);
189 void uregex_setText (uregex *regexp, const UniChar *text, int32_t textLength, int32_t *status);
190 int32_t uregex_split (uregex *regexp, UniChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UniChar *destFields[], int32_t destFieldsCapacity, int32_t *status);
191 int32_t uregex_start (uregex *regexp, int32_t groupNum, int32_t *status);
194 static RKLCacheSlot *getCachedRegex (NSString *regexString, RKLRegexOptions options, NSError **error, id *exception);
195 static BOOL setCacheSlotToString (RKLCacheSlot *cacheSlot, const NSRange *range, int32_t *status, id *exception);
196 static RKLCacheSlot *getCachedRegexSetToString (NSString *regexString, RKLRegexOptions options, NSString *matchString, NSUInteger *matchLengthPtr, NSRange *matchRange, NSError **error, id *exception, int32_t *status);
197 static id performRegexOp (id self, SEL _cmd, RKLRegexOp doRegexOp, NSString *regexString, RKLRegexOptions options, NSInteger capture, id matchString, NSRange *matchRange, NSString *replacementString, NSError **error, void **result);
199 static void rkl_find (RKLCacheSlot *cacheSlot, NSInteger capture, NSRange searchRange, NSRange *resultRange, id *exception, int32_t *status);
200 static NSArray *rkl_splitArray (RKLCacheSlot *cacheSlot, id *exception, int32_t *status);
201 static NSString *rkl_replaceString (RKLCacheSlot *cacheSlot, id searchString, NSUInteger searchU16Length, NSString *replacementString, NSUInteger replacementU16Length, NSUInteger *replacedCount, int replaceMutable, id *exception, int32_t *status);
202 static int32_t rkl_replaceAll (RKLCacheSlot *cacheSlot, const UniChar *replacementUniChar, int32_t replacementU16Length, UniChar *replacedUniChar, int32_t replacedU16Capacity, NSUInteger *replacedCount, id *exception, int32_t *status);
204 static void rkl_clearStringCache (void);
205 static void clearBuffer (RKLBuffer *buffer, int freeDynamicBuffer);
206 static void clearCacheSlotRegex (RKLCacheSlot *cacheSlot);
207 static void clearCacheSlotSetTo (RKLCacheSlot *cacheSlot);
209 static NSDictionary *userInfoDictionary (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status, ...);
210 static NSError *RKLNSErrorForRegex (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status);
211 static NSException *RKLNSExceptionForRegex (NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status);
212 static NSDictionary *RKLCAssertDictionary (const char *function, const char *file, int line, NSString *format, ...);
213 static NSString *RKLStringFromClassAndMethod(id object, SEL selector, NSString *format, ...);
216 // If compiled with Garbage Collection, we need to be able to do a few things slightly differently.
217 // The basic premiss is that under GC we use a trampoline function pointer which is set to a _start function to catch the first invocation.
218 // The _start function checks if GC is running and then overwrites the function pointer with the appropriate routine. Think of it as 'lazy linking'.
220 // rkl_collectingEnabled uses objc_getClass() to get the NSGarbageCollector class, which doesn't exist on earlier systems.
221 // This allows for graceful failure should we find ourselves running on an earlier version of the OS without NSGarbageCollector.
222 static BOOL rkl_collectingEnabled_first (void);
223 static BOOL rkl_collectingEnabled_yes (void) { return(YES); }
224 static BOOL rkl_collectingEnabled_no (void) { return(NO); }
225 static BOOL(*rkl_collectingEnabled) (void) = rkl_collectingEnabled_first;
226 static BOOL rkl_collectingEnabled_first (void) { return((([objc_getClass("NSGarbageCollector") defaultCollector]!=NULL) ? (rkl_collectingEnabled=rkl_collectingEnabled_yes) : (rkl_collectingEnabled=rkl_collectingEnabled_no))()); }
228 static void *rkl_realloc_first (void **ptr, size_t size, NSUInteger flags);
229 static void *rkl_realloc_std (void **ptr, size_t size, NSUInteger flags) { flags=flags; /*unused*/ return((*ptr = reallocf(*ptr, size))); }
230 static void *rkl_realloc_gc (void **ptr, size_t size, NSUInteger flags) { void *p=NULL; if(flags!=0) { p=NSAllocateCollectable((NSUInteger)size,flags); if(*ptr!=NULL) { free(*ptr); *ptr=NULL; } } else { p=*ptr=reallocf(*ptr, size); } return(p); }
231 static void *(*rkl_realloc) (void **ptr, size_t size, NSUInteger flags) = rkl_realloc_first;
232 static void *rkl_realloc_first (void **ptr, size_t size, NSUInteger flags) { return(((rkl_collectingEnabled()==YES) ? (rkl_realloc=rkl_realloc_gc) : (rkl_realloc=rkl_realloc_std))(ptr, size, flags)); }
234 static id rkl_CFAutorelease_first (CFTypeRef obj);
235 static id rkl_CFAutorelease_std (CFTypeRef obj) { return([(id)obj autorelease]); }
236 static id rkl_CFAutorelease_gc (CFTypeRef obj) { return((id)CFMakeCollectable(obj)); }
237 static id(*rkl_CFAutorelease) (CFTypeRef obj) = rkl_CFAutorelease_first;
238 static id rkl_CFAutorelease_first (CFTypeRef obj) { return(((rkl_collectingEnabled()==YES) ? (rkl_CFAutorelease=rkl_CFAutorelease_gc) : (rkl_CFAutorelease=rkl_CFAutorelease_std))(obj)); }
240 #else // __OBJC_GC__ not defined
242 static void *rkl_realloc (void **ptr, size_t size, NSUInteger flags) { flags=flags; /*unused*/ return((*ptr = reallocf(*ptr, size))); }
243 static id rkl_CFAutorelease (CFTypeRef obj) { return([(id)obj autorelease]); }
245 #endif // __OBJC_GC__
247 #ifdef RKL_FAST_MUTABLE_CHECK
248 // We use a trampoline function pointer to check at run time if the function __CFStringIsMutable is available.
249 // If it is, the trampoline function pointer is replaced with the address of that function.
250 // Otherwise, we assume the worst case that ever string is mutable.
251 // This hopefully helps to protect us since we're using an undocumented, non-public API call.
252 // We will keep on working if it ever does go away, just with a bit less performance due to the overhead of mutable checks.
253 static BOOL rkl_CFStringIsMutable_first (CFStringRef str);
254 static BOOL rkl_CFStringIsMutable_yes (CFStringRef str) { str=str; /*unused*/ return(YES); }
255 static BOOL(*rkl_CFStringIsMutable) (CFStringRef str) = rkl_CFStringIsMutable_first;
256 static BOOL rkl_CFStringIsMutable_first (CFStringRef str) { if((rkl_CFStringIsMutable = dlsym(RTLD_DEFAULT, "__CFStringIsMutable")) == NULL) { rkl_CFStringIsMutable = rkl_CFStringIsMutable_yes; } return(rkl_CFStringIsMutable(str)); }
257 #else // RKL_FAST_MUTABLE_CHECK is not defined. Assume that all strings are potentially mutable.
258 #define rkl_CFStringIsMutable(s) (YES)
260 BOOL __CFStringIsMutable(CFStringRef str);
262 // Translation unit scope global variables.
264 static UniChar fixedUniChar[(RKL_FIXED_LENGTH)]; // This is the fixed sized UTF-16 conversion buffer.
265 static RKLCacheSlot RKLCache[(RKL_CACHE_SIZE)], *lastCacheSlot;
266 static OSSpinLock cacheSpinLock = OS_SPINLOCK_INIT;
267 static RKLBuffer dynamicBuffer, fixedBuffer = {NULL, 0UL, 0L, &fixedUniChar[0]};
268 static const UniChar emptyUniCharString[1]; // For safety, icu_regexes are 'set' to this when the string they were searched is cleared.
269 static void *scratchBuffer[(SCRATCH_BUFFERS)]; // Used to hold temporary allocations that are allocated via reallocf().
271 // These are used when running under manual memory management for the array that rkl_splitArray creates.
272 // The split strings are created, but not autoreleased. The (immutable) array is created using these callbacks, which skips the CFRetain() call.
273 // For each split string this saves the overhead of an autorelease, then an array retain, then a autoreleasepool release. This is good for a ~30% speed increase.
274 static Boolean RKLCFArrayEqualCallBack (const void *value1, const void *value2) { return(CFEqual(value1, value2)); }
275 static void RKLCFArrayRelease (CFAllocatorRef allocator, const void *ptr) { allocator=allocator;/*unused*/ CFRelease(ptr); }
276 static CFArrayCallBacks transferOwnershipArrayCallBacks = { 0, NULL, RKLCFArrayRelease, NULL, RKLCFArrayEqualCallBack };
278 #if defined(RKL_REGISTER_FOR_IPHONE_LOWMEM_NOTIFICATIONS) && (RKL_REGISTER_FOR_IPHONE_LOWMEM_NOTIFICATIONS == 1)
280 // The next few lines are specifically for the iPhone to catch low memory conditions.
281 // The basic idea is that rkl_RegisterForLowMemoryNotifications() is set to be run once by the linker at load time via __attribute((constructor)).
282 // rkl_RegisterForLowMemoryNotifications() tries to find the iPhone low memory notification symbol. If it can find it,
283 // it registers with the default NSNotificationCenter to call the RKLLowMemoryWarningObserver class method +lowMemoryWarning:.
284 // rkl_RegisterForLowMemoryNotifications() uses an atomic compare and swap to guarentee that it initalizes exactly once.
285 // +lowMemoryWarning tries to acquire the cache lock. If it gets the lock, it clears the cache. If it can't, it calls performSelector:
286 // with a delay of half a second to try again. This will hopefully prevent any deadlocks, such as a RegexKitLite request for
287 // memory triggering a notifcation while the lock is held.
289 static void rkl_RegisterForLowMemoryNotifications(void);
291 @interface RKLLowMemoryWarningObserver : NSObject +(void)lowMemoryWarning:(id)notification; @end
292 @implementation RKLLowMemoryWarningObserver
293 +(void)lowMemoryWarning:(id)notification {
294 if(OSSpinLockTry(&cacheSpinLock)) { rkl_clearStringCache(); OSSpinLockUnlock(&cacheSpinLock); }
295 else { [[RKLLowMemoryWarningObserver class] performSelector:@selector(lowMemoryWarning:) withObject:NULL afterDelay:0.5]; }
299 static int rkl_HaveRegisteredForLowMemoryNotifications = 0;
301 __attribute__((constructor)) static void rkl_RegisterForLowMemoryNotifications(void) {
302 void **memoryWarningNotification = NULL;
304 if(OSAtomicCompareAndSwapIntBarrier(0, 1, &rkl_HaveRegisteredForLowMemoryNotifications)) {
305 if((memoryWarningNotification = dlsym(RTLD_DEFAULT, "UIApplicationDidReceiveMemoryWarningNotification")) != NULL) {
306 [[NSNotificationCenter defaultCenter] addObserver:[RKLLowMemoryWarningObserver class] selector:@selector(lowMemoryWarning:) name:*memoryWarningNotification object:NULL];
313 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
314 // IMPORTANT! Should only be called with cacheSpinLock already locked!
317 static RKLCacheSlot *getCachedRegex(NSString *regexString, RKLRegexOptions options, NSError **error, id *exception) {
318 RKLCacheSlot *cacheSlot = NULL;
319 CFHashCode regexHash = 0;
322 RKLCDelayedAssert(regexString != NULL, exception, exitNow);
324 // Fast path the common case where this regex is exactly the same one used last time.
325 if((lastCacheSlot != NULL) && (lastCacheSlot->options == options) && (lastCacheSlot->icu_regex != NULL) && (lastCacheSlot->regexString != NULL) && (lastCacheSlot->regexString == (CFStringRef)regexString)) { return(lastCacheSlot); }
327 regexHash = CFHash((CFTypeRef)regexString);
328 cacheSlot = &RKLCache[(regexHash % RKL_CACHE_SIZE)]; // Retrieve the cache slot for this regex.
330 // Return the cached entry if it's a match, otherwise clear the slot and create a new ICU regex in its place.
331 if((cacheSlot->options == options) && (cacheSlot->icu_regex != NULL) && (cacheSlot->regexString != NULL) && ((cacheSlot->regexString == (CFStringRef)regexString) || (CFEqual((CFTypeRef)regexString, cacheSlot->regexString) == YES))) { lastCacheSlot = cacheSlot; return(cacheSlot); }
333 clearCacheSlotRegex(cacheSlot);
335 if((cacheSlot->regexString = CFStringCreateCopy(NULL, (CFStringRef)regexString)) == NULL) { goto exitNow; } ; // Get a cheap immutable copy.
336 cacheSlot->options = options;
338 CFIndex regexStringU16Length = CFStringGetLength(cacheSlot->regexString); // In UTF16 code units.
339 UParseError parseError = (UParseError){-1, -1, {0}, {0}};
340 UniChar *regexUniChar = NULL;
342 // Try to quickly obtain regexString in UTF16 format.
343 if((regexUniChar = (UniChar *)CFStringGetCharactersPtr(cacheSlot->regexString)) == NULL) { // We didn't get the UTF16 pointer quickly and need to perform a full conversion in a temp buffer.
344 if((regexStringU16Length * sizeof(UniChar)) < RKL_STACK_LIMIT) { if((regexUniChar = alloca(regexStringU16Length * sizeof(UniChar))) == NULL) { goto exitNow; } } // Try to use the stack.
345 else { if((regexUniChar = rkl_realloc(&scratchBuffer[0], regexStringU16Length * sizeof(UniChar), 0UL)) == NULL) { goto exitNow; } } // Otherwise use the heap.
346 CFStringGetCharacters(cacheSlot->regexString, CFMakeRange(0, regexStringU16Length), (UniChar *)regexUniChar); // Convert regexString to UTF16.
349 // Create the ICU regex.
350 if((cacheSlot->icu_regex = uregex_open(regexUniChar, (int32_t)regexStringU16Length, options, &parseError, &status)) == NULL) { goto exitNow; }
351 if(status <= 0) { cacheSlot->captureCount = (NSInteger)uregex_groupCount(cacheSlot->icu_regex, &status); }
352 if(status <= 0) { lastCacheSlot = cacheSlot; }
355 if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; }
356 if(status > 0) { cacheSlot = NULL; if(error != NULL) { *error = RKLNSErrorForRegex(regexString, options, &parseError, status); } }
360 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
361 // IMPORTANT! Should only be called with cacheSpinLock already locked!
364 static BOOL setCacheSlotToString(RKLCacheSlot *cacheSlot, const NSRange *range, int32_t *status, id *exception) {
365 RKLCDelayedAssert((cacheSlot != NULL) && (cacheSlot->setToString != NULL) && (range != NULL) && (status != NULL), exception, exitNow);
367 if(cacheSlot->setToNeedsConversion == NO) { goto setRegexText; }
369 RKLBuffer *buffer = (cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer;
370 if((cacheSlot->setToUniChar != NULL) && ((cacheSlot->setToString == buffer->string) || ((cacheSlot->setToLength == buffer->length) && (cacheSlot->setToHash == buffer->hash)))) { goto setRegexText; }
372 clearBuffer(buffer, NO);
374 if(cacheSlot->setToLength >= RKL_FIXED_LENGTH) {
375 RKLCDelayedAssert(buffer == &dynamicBuffer, exception, exitNow);
376 if((dynamicBuffer.uniChar = rkl_realloc((void *)&dynamicBuffer.uniChar, (cacheSlot->setToLength * sizeof(UniChar)), 0UL)) == NULL) { return(NO); } // Resize the buffer.
378 RKLCDelayedAssert(buffer->uniChar != NULL, exception, exitNow);
379 CFStringGetCharacters(cacheSlot->setToString, CFMakeRange(0, cacheSlot->setToLength), (UniChar *)buffer->uniChar); // Convert to a UTF16 string.
381 if((buffer->string = CFRetain(cacheSlot->setToString)) == NULL) { return(NO); }
382 buffer->hash = cacheSlot->setToHash;
383 buffer->length = cacheSlot->setToLength;
385 cacheSlot->setToUniChar = buffer->uniChar;
386 cacheSlot->setToRange = NSNotFoundRange;
390 if(NSEqualRanges(cacheSlot->setToRange, *range) == NO) {
391 RKLCDelayedAssert((cacheSlot->icu_regex != NULL) && (cacheSlot->setToUniChar != NULL) && (NSMaxRange(*range) <= (NSUInteger)cacheSlot->setToLength), exception, exitNow);
392 cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange;
393 cacheSlot->setToRange = *range;
394 uregex_setText(cacheSlot->icu_regex, cacheSlot->setToUniChar + cacheSlot->setToRange.location, (int32_t)cacheSlot->setToRange.length, status);
395 if(*status > 0) { return(NO); }
404 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
405 // IMPORTANT! Should only be called with cacheSpinLock already locked!
408 static RKLCacheSlot *getCachedRegexSetToString(NSString *regexString, RKLRegexOptions options, NSString *matchString, NSUInteger *matchLengthPtr, NSRange *matchRange, NSError **error, id *exception, int32_t *status) {
409 RKLCacheSlot *cacheSlot = NULL;
410 RKLCDelayedAssert((regexString != NULL) && (exception != NULL) && (status != NULL), exception, exitNow);
412 // Fast path the common case where this regex is exactly the same one used last time.
413 if((lastCacheSlot != NULL) && (lastCacheSlot->regexString == (CFStringRef)regexString) && (lastCacheSlot->options == options)) { cacheSlot = lastCacheSlot; }
414 else { if((cacheSlot = getCachedRegex(regexString, options, error, exception)) == NULL) { goto exitNow; } }
416 // Optimize the case where the string to search (matchString) is immutable and the setToString immutable copy is the same string with its reference count incremented.
417 BOOL isSetTo = ((cacheSlot->setToString != NULL) && (cacheSlot->setToString == (CFStringRef)matchString)) ? YES : NO;
418 CFIndex matchLength = ((isSetTo == YES) && (cacheSlot->setToIsImmutable == YES)) ? cacheSlot->setToLength : CFStringGetLength((CFStringRef)matchString);
420 *matchLengthPtr = (NSUInteger)matchLength;
421 if(matchRange->length == NSUIntegerMax) { matchRange->length = matchLength; } // For convenience, allow NSUIntegerMax == string length.
423 if((NSUInteger)matchLength < NSMaxRange(*matchRange)) { *exception = [NSException exceptionWithName:NSRangeException reason:@"Range or index out of bounds" userInfo:NULL]; goto exitNow; }
425 if((cacheSlot->setToIsImmutable == NO) && (cacheSlot->setToString != NULL) && ((cacheSlot->setToLength != CFStringGetLength(cacheSlot->setToString)) || (cacheSlot->setToHash != CFHash(cacheSlot->setToString)))) { isSetTo = NO; }
426 else { // If the first pointer equality check failed, check the hash and length.
427 if(((isSetTo == NO) || (cacheSlot->setToIsImmutable == NO)) && (cacheSlot->setToString != NULL)) { isSetTo = ((cacheSlot->setToLength == matchLength) && (cacheSlot->setToHash == CFHash((CFStringRef)(matchString)))); }
429 if((isSetTo == YES)) { // Make sure that the UTF16 conversion cache is set to this string, if conversion is required.
430 if((cacheSlot->setToNeedsConversion == YES) && (setCacheSlotToString(cacheSlot, matchRange, status, exception) == NO)) { *exception = RKLCAssert(@"Failed to set up UTF16 buffer."); goto exitNow; }
431 if(NSEqualRanges(cacheSlot->setToRange, *matchRange) == YES) { goto exitNow; } // Verify that the range to search is what the cached regex was prepped for last time.
435 // Sometimes the range that the regex is set to isn't right, in which case we don't want to clear the cache slot. Otherwise, flush it out.
436 if((cacheSlot->setToString != NULL) && (isSetTo == NO)) { clearCacheSlotSetTo(cacheSlot); }
438 if(cacheSlot->setToString == NULL) {
439 cacheSlot->setToString = CFRetain(matchString);
440 RKLCDelayedAssert(cacheSlot->setToString != NULL, exception, exitNow);
441 cacheSlot->setToUniChar = CFStringGetCharactersPtr(cacheSlot->setToString);
442 cacheSlot->setToNeedsConversion = (cacheSlot->setToUniChar == NULL) ? YES : NO;
443 cacheSlot->setToIsImmutable = !rkl_CFStringIsMutable(cacheSlot->setToString); // If RKL_FAST_MUTABLE_CHECK is not defined then the result is '0', or in other words mutable..
444 cacheSlot->setToHash = CFHash(cacheSlot->setToString);
445 cacheSlot->setToRange = NSNotFoundRange;
446 cacheSlot->setToLength = matchLength;
449 if(setCacheSlotToString(cacheSlot, matchRange, status, exception) == NO) { cacheSlot = NULL; goto exitNow; }
455 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
458 static id performRegexOp(id self, SEL _cmd, RKLRegexOp doRegexOp, NSString *regexString, RKLRegexOptions options, NSInteger capture, id matchString, NSRange *matchRange, NSString *replacementString, NSError **error, void **result) {
459 BOOL replaceMutable = ((doRegexOp & RKLReplaceMutable) != 0) ? YES : NO;
460 RKLRegexOp regexOp = (doRegexOp & RKLMaskOp);
462 if((error != NULL) && (*error != NULL)) { *error = NULL; }
464 if(regexString == NULL) { RKLRaiseException(NSInvalidArgumentException, @"The regular expression argument is NULL."); }
465 if(matchString == NULL) { RKLRaiseException(NSInternalInconsistencyException, @"The match string argument is NULL."); }
466 if((regexOp == RKLReplaceOp) && (replacementString == NULL)) { RKLRaiseException(NSInvalidArgumentException, @"The replacement string argument is NULL."); }
468 NSUInteger stringU16Length = 0UL, replacementU16Length = (NSUInteger)((replacementString != NULL) ? CFStringGetLength((CFStringRef)replacementString) : 0); // In UTF16 code units.
469 NSRange stringRange = NSMakeRange(0, NSUIntegerMax), searchRange = (matchRange != NULL) ? *matchRange : NSNotFoundRange;
470 RKLCacheSlot *cacheSlot = NULL;
472 id resultObject = NULL;
475 // IMPORTANT! Once we have obtained the lock, code MUST exit via 'goto exitNow;' to unlock the lock! NO EXCEPTIONS!
477 OSSpinLockLock(&cacheSpinLock); // Grab the lock and get cache entry.
479 if(((cacheSlot = getCachedRegexSetToString(regexString, options, matchString, &stringU16Length, (regexOp == RKLRangeOp) ? &stringRange : &searchRange, error, &exception, &status)) == NULL) || (exception != NULL) || (status > 0)) { goto exitNow; }
481 if(searchRange.length == NSUIntegerMax) { searchRange.length = stringU16Length; } // For convenience.
482 if(stringU16Length < NSMaxRange(searchRange)) { exception = [NSException exceptionWithName:NSRangeException reason:@"Range or index out of bounds" userInfo:NULL]; goto exitNow; }
484 RKLCDelayedAssert((cacheSlot->icu_regex != NULL) && (exception == NULL), &exception, exitNow);
486 if(cacheSlot->setToNeedsConversion != 0) {
487 RKLBuffer *buffer = (cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer;
488 RKLCDelayedAssert((cacheSlot->setToHash == buffer->hash) && (cacheSlot->setToLength == buffer->length) && (cacheSlot->setToUniChar == buffer->uniChar), &exception, exitNow);
492 case RKLRangeOp: rkl_find(cacheSlot, capture, searchRange, (NSRange *)result, &exception, &status); break;
493 case RKLSplitOp: resultObject = rkl_splitArray(cacheSlot, &exception, &status); break;
494 case RKLReplaceOp: resultObject = rkl_replaceString(cacheSlot, matchString, stringU16Length, replacementString, replacementU16Length, (NSUInteger *)result, replaceMutable, &exception, &status); break;
495 default: exception = RKLCAssert(@"Unknown regexOp code."); break;
499 OSSpinLockUnlock(&cacheSpinLock);
501 if((status > 0) && (exception == NULL)) { exception = RKLNSExceptionForRegex(regexString, options, NULL, status); } // If we had a problem, throw an exception.
502 if(exception != NULL) {
503 if([exception isKindOfClass:[NSException class]]) { [[NSException exceptionWithName:[exception name] reason:RKLStringFromClassAndMethod(self, _cmd, [exception reason]) userInfo:[exception userInfo]] raise]; }
504 else { [[NSAssertionHandler currentHandler] handleFailureInFunction:[exception objectForKey:@"function"] file:[exception objectForKey:@"file"] lineNumber:[[exception objectForKey:@"line"] longValue] description:[exception objectForKey:@"description"]]; }
506 if(replaceMutable == YES) { // We're working on a mutable string and if there were successfull matches with replaced text we still have work to do. Done outside the cache lock.
507 if(*((NSUInteger *)result) > 0) { NSCParameterAssert(resultObject != NULL); [matchString replaceCharactersInRange:searchRange withString:resultObject]; }
510 return(resultObject);
513 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
514 // IMPORTANT! Should only be called from performRegexOp().
517 static void rkl_find(RKLCacheSlot *cacheSlot, NSInteger capture, NSRange searchRange, NSRange *resultRange, id *exception, int32_t *status) {
518 NSRange captureRange = NSNotFoundRange;
520 RKLCDelayedAssert((cacheSlot != NULL) && (resultRange != NULL) && (exception != NULL) && (status != NULL), exception, exitNow);
522 if((capture < 0) || (capture > cacheSlot->captureCount)) { *exception = [NSException exceptionWithName:NSInvalidArgumentException reason:@"The capture argument is not valid." userInfo:NULL]; goto exitNow; }
524 if((NSEqualRanges(searchRange, cacheSlot->lastFindRange) == NO)) { // Only perform an expensive 'find' operation iff the current find range is different than the last find range.
525 RKL_PREFETCH(cacheSlot->setToUniChar, searchRange.location << 1); // Spool up the CPU caches.
527 // Using uregex_findNext can be a slight performance win.
528 BOOL useFindNext = (searchRange.location == (NSMaxRange(cacheSlot->lastMatchRange) + ((cacheSlot->lastMatchRange.length == 0) ? 1 : 0))) ? YES : NO;
530 cacheSlot->lastFindRange = NSNotFoundRange; // Cleared the cached search/find range.
531 if(useFindNext == NO) { if((uregex_find (cacheSlot->icu_regex, (int32_t)searchRange.location, status) == NO) || (*status > 0)) { goto exitNow; } }
532 else { if((uregex_findNext(cacheSlot->icu_regex, status) == NO) || (*status > 0)) { goto exitNow; } }
534 if(RKLGetRangeForCapture(cacheSlot->icu_regex, status, 0, cacheSlot->lastMatchRange) != 0) { goto exitNow; }
535 if(NSRangeInsideRange(cacheSlot->lastMatchRange, searchRange) == NO) { goto exitNow; } // If the regex matched outside the requested range, exit.
537 cacheSlot->lastFindRange = searchRange; // Cache the successful search/find range.
540 if(capture == 0) { captureRange = cacheSlot->lastMatchRange; } else { RKLGetRangeForCapture(cacheSlot->icu_regex, status, capture, captureRange); }
543 *resultRange = captureRange;
546 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
547 // IMPORTANT! Should only be called from performRegexOp().
550 static NSArray *rkl_splitArray(RKLCacheSlot *cacheSlot, id *exception, int32_t *status) {
551 NSArray *resultArray = NULL;
553 RKLCDelayedAssert((cacheSlot != NULL) && (status != NULL), exception, exitNow);
555 const char *setToUniCharChar = (const char *)(cacheSlot->setToUniChar + cacheSlot->setToRange.location);
556 NSUInteger splitRangesCapacity = ((((RKL_STACK_LIMIT / sizeof(NSRange)) / 4) + ((cacheSlot->captureCount + 1) * 2)) + 2), splitRangesIndex = 0, lastLocation = 0, x = 0;
557 size_t splitRangesSize = (splitRangesCapacity * sizeof(NSRange)), stackUsed = 0;
558 NSInteger captureCount = cacheSlot->captureCount;
559 uregex *icu_regex = cacheSlot->icu_regex;
560 NSRange *splitRanges = NULL;
561 BOOL copiedStackToHeap = NO;
563 if(cacheSlot->setToLength == 0) { resultArray = [NSArray array]; goto exitNow; } // Return an empty array when there is nothing to search.
565 if(splitRangesSize < RKL_STACK_LIMIT) { if((splitRanges = alloca(splitRangesSize)) == NULL) { goto exitNow; } stackUsed += splitRangesSize; }
566 else { if((splitRanges = rkl_realloc(&scratchBuffer[0], splitRangesSize, 0UL)) == NULL) { goto exitNow; } }
568 cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange; // Clear the cached find information for this regex so a subsequent find works correctly.
569 uregex_reset(icu_regex, 0, status); // Reset the regex to the start of the string.
571 for(splitRangesIndex = 0; splitRangesIndex < splitRangesCapacity; splitRangesIndex++) {
573 if(splitRangesIndex >= ((splitRangesCapacity - ((captureCount + 1) * 2)) - 1)) { // Check if we need to grow our NSRanges buffer.
574 NSUInteger newCapacity = (((splitRangesCapacity + (splitRangesCapacity / 2)) + ((captureCount + 1) * 2)) + 2);
575 size_t newSize = (newCapacity * sizeof(NSRange));
576 NSRange *newRanges = NULL;
578 if((newRanges = rkl_realloc(&scratchBuffer[0], newSize, 0UL)) == NULL) { goto exitNow; } // We only try to use the stack the first time, after that, we use the heap.
579 if((stackUsed > 0) && (copiedStackToHeap == NO)) { memcpy(newRanges, splitRanges, splitRangesSize); copiedStackToHeap = YES; }
581 splitRangesCapacity = newCapacity;
582 splitRangesSize = newSize;
583 splitRanges = newRanges;
586 RKL_PREFETCH(setToUniCharChar, lastLocation << 1); // Spool up the CPU caches.
588 NSUInteger baseMatchIndex = splitRangesIndex;
591 if((uregex_findNext(icu_regex, status) == NO) || (*status > 0)) { break; }
592 if(RKLGetRangeForCapture(icu_regex, status, 0, tempRange) > 0) { goto exitNow; }
594 splitRanges[splitRangesIndex] = NSMakeRange(lastLocation, tempRange.location - lastLocation);
595 lastLocation = NSMaxRange(tempRange);
598 for(capture = 1; capture <= captureCount; capture++) {
599 RKLCDelayedAssert(splitRangesIndex < (splitRangesCapacity - 2), exception, exitNow);
602 if(RKLGetRangeForCapture(icu_regex, status, capture, splitRanges[splitRangesIndex]) > 0) { goto exitNow; }
603 if(splitRanges[splitRangesIndex].location == NSNotFound) { splitRanges[splitRangesIndex] = NSMakeRange(splitRanges[baseMatchIndex].location, 0); }
607 RKLCDelayedAssert(splitRangesIndex < (splitRangesCapacity - 2), exception, exitNow);
608 splitRanges[splitRangesIndex] = NSMakeRange(lastLocation, (NSMaxRange(cacheSlot->setToRange) - cacheSlot->setToRange.location) - lastLocation);
611 CFIndex setToLocation = cacheSlot->setToRange.location;
612 CFStringRef setToString = cacheSlot->setToString;
613 size_t splitStringsSize = (splitRangesIndex * sizeof(id));
614 id *splitStrings = NULL;
616 if((stackUsed + splitStringsSize) < RKL_STACK_LIMIT) { if((splitStrings = alloca(splitStringsSize)) == NULL) { goto exitNow; } stackUsed += splitStringsSize; }
618 else { if((splitStrings = rkl_realloc(&scratchBuffer[1], splitStringsSize, (NSUInteger)NSScannedOption)) == NULL) { goto exitNow; } }
620 // http://sourceforge.net/tracker/index.php?func=detail&aid=2050825&group_id=204582&atid=990188
621 // This is to get around an iPhone quirk. For whatever reason, the iPhone NSZone.h explicitly removes all NSAllocateCollectable()
622 // bits and pieces using #if pre-processor conditions. Since NSScannedOption is only really used when the compiler has -fobjc-gc enabled,
623 // we just chop it out here.
624 else { if((splitStrings = rkl_realloc(&scratchBuffer[1], splitStringsSize, 0)) == NULL) { goto exitNow; } }
628 if(rkl_collectingEnabled() == YES) { // I just don't trust the GC system with the faster CF way of doing things... It never seems to work quite the way you expect it to.
629 for(x = 0; x < splitRangesIndex; x++) { // Optimize the case where the length == 0 by substituting the string @"".
630 splitStrings[x] = (splitRanges[x].length == 0) ? @"" : [(id)setToString substringWithRange:NSMakeRange(setToLocation + splitRanges[x].location, splitRanges[x].length)];
632 resultArray = [NSArray arrayWithObjects:splitStrings count:splitRangesIndex];
635 { // This block of code is always compiled in. It is used when not compiled with GC or when compiled with GC but the collector is not enabled.
636 for(x = 0; x < splitRangesIndex; x++) { // Optimize the case where the length == 0 by substituting the string @"".
637 splitStrings[x] = (splitRanges[x].length == 0) ? @"" : (id)CFStringCreateWithSubstring(NULL, setToString, CFMakeRange(setToLocation + splitRanges[x].location, (CFIndex)splitRanges[x].length));
639 resultArray = rkl_CFAutorelease(CFArrayCreate(NULL, (const void **)splitStrings, (CFIndex)splitRangesIndex, &transferOwnershipArrayCallBacks)); // Create the CF/NSArray of the split strings.
643 if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; }
644 if(scratchBuffer[1] != NULL) { free(scratchBuffer[1]); scratchBuffer[1] = NULL; }
649 // IMPORTANT! This code is critical path code. Because of this, it has been written for speed, not clarity.
650 // IMPORTANT! Should only be called from performRegexOp().
653 static NSString *rkl_replaceString(RKLCacheSlot *cacheSlot, id searchString, NSUInteger searchU16Length, NSString *replacementString, NSUInteger replacementU16Length, NSUInteger *replacedCountPtr, int replaceMutable, id *exception, int32_t *status) {
654 int32_t resultU16Length = 0, tempUniCharBufferU16Capacity = 0;
655 UniChar *tempUniCharBuffer = NULL;
656 const UniChar *replacementUniChar = NULL;
657 id resultObject = NULL;
658 NSUInteger replacedCount = 0;
660 // Zero order approximation of the buffer sizes for holding the replaced string or split strings and split strings pointer offsets. As UTF16 code units.
661 tempUniCharBufferU16Capacity = (int32_t)(16 + (searchU16Length + (searchU16Length >> 1)) + (replacementU16Length * 2));
663 // Buffer sizes converted from native units to bytes.
664 size_t stackSize = 0, replacementSize = (replacementU16Length * sizeof(UniChar)), tempUniCharBufferSize = (tempUniCharBufferU16Capacity * sizeof(UniChar));
666 // For the various buffers we require, we first try to allocate from the stack if we're not over the RKL_STACK_LIMIT. If we are, switch to using the heap for the buffer.
668 if((stackSize + tempUniCharBufferSize) < RKL_STACK_LIMIT) { if((tempUniCharBuffer = alloca(tempUniCharBufferSize)) == NULL) { goto exitNow; } stackSize += tempUniCharBufferSize; }
669 else { if((tempUniCharBuffer = rkl_realloc(&scratchBuffer[0], tempUniCharBufferSize, 0UL)) == NULL) { goto exitNow; } }
671 // Try to get the pointer to the replacement strings UTF16 data. If we can't, allocate some buffer space, then covert to UTF16.
672 if((replacementUniChar = CFStringGetCharactersPtr((CFStringRef)replacementString)) == NULL) {
673 if((stackSize + replacementSize) < RKL_STACK_LIMIT) { if((replacementUniChar = alloca(replacementSize)) == NULL) { goto exitNow; } stackSize += replacementSize; }
674 else { if((replacementUniChar = rkl_realloc(&scratchBuffer[1], replacementSize, 0UL)) == NULL) { goto exitNow; } }
675 CFStringGetCharacters((CFStringRef)replacementString, CFMakeRange(0, replacementU16Length), (UniChar *)replacementUniChar); // Convert to a UTF16 string.
678 cacheSlot->lastFindRange = cacheSlot->lastMatchRange = NSNotFoundRange; // Clear the cached find information for this regex so a subsequent find works correctly.
680 resultU16Length = rkl_replaceAll(cacheSlot, replacementUniChar, (int32_t)replacementU16Length, tempUniCharBuffer, tempUniCharBufferU16Capacity, &replacedCount, exception, status);
682 if(*status == U_BUFFER_OVERFLOW_ERROR) { // Our buffer guess(es) were too small. Resize the buffers and try again.
683 tempUniCharBufferSize = ((tempUniCharBufferU16Capacity = resultU16Length + 4) * sizeof(UniChar));
684 if((stackSize + tempUniCharBufferSize) < RKL_STACK_LIMIT) { if((tempUniCharBuffer = alloca(tempUniCharBufferSize)) == NULL) { goto exitNow; } stackSize += tempUniCharBufferSize; }
685 else { if((tempUniCharBuffer = rkl_realloc(&scratchBuffer[0], tempUniCharBufferSize, 0UL)) == NULL) { goto exitNow; } }
687 *status = 0; // Make sure the status var is cleared and try again.
688 resultU16Length = rkl_replaceAll(cacheSlot, replacementUniChar, (int32_t)replacementU16Length, tempUniCharBuffer, tempUniCharBufferU16Capacity, &replacedCount, exception, status);
691 if(*status > 0) { goto exitNow; } // Something went wrong.
693 if(resultU16Length == 0) { resultObject = @""; } // Optimize the case where the replaced text length == 0 with a @"" string.
694 else if(((NSUInteger)resultU16Length == searchU16Length) && (replacedCount == 0)) { // Optimize the case where the replacement == original by creating a copy. Very fast if self is immutable.
695 if(replaceMutable == NO) { resultObject = rkl_CFAutorelease(CFStringCreateCopy(NULL, (CFStringRef)searchString)); } // .. but only if this is not replacing a mutable self.
696 } else { resultObject = rkl_CFAutorelease(CFStringCreateWithCharacters(NULL, tempUniCharBuffer, (CFIndex)resultU16Length)); } // otherwise, create a new string.
698 // If replaceMutable == YES, we don't do the replacement here. We wait until after we return and unlock the cache lock.
699 // This is because we may be trying to mutate an immutable string object.
700 if((replacedCount > 0) && (replaceMutable == YES)) { // We're working on a mutable string and there were successfull matches with replaced text, so there's work to do.
701 clearBuffer((cacheSlot->setToLength < RKL_FIXED_LENGTH) ? &fixedBuffer : &dynamicBuffer, NO);
702 clearCacheSlotSetTo(cacheSlot); // Flush any cached information about this string since it will mutate.
706 if(scratchBuffer[0] != NULL) { free(scratchBuffer[0]); scratchBuffer[0] = NULL; }
707 if(scratchBuffer[1] != NULL) { free(scratchBuffer[1]); scratchBuffer[1] = NULL; }
708 if(replacedCountPtr != NULL) { *replacedCountPtr = replacedCount; }
709 return(resultObject);
712 // Modified version of the ICU libraries uregex_replaceAll() that keeps count of the number of replacements made.
713 static int32_t rkl_replaceAll(RKLCacheSlot *cacheSlot, const UniChar *replacementUniChar, int32_t replacementU16Length, UniChar *replacedUniChar, int32_t replacedU16Capacity, NSUInteger *replacedCount, id *exception, int32_t *status) {
714 NSUInteger replaced = 0;
715 int32_t u16Length = 0;
716 RKLCDelayedAssert((cacheSlot != NULL) && (replacementUniChar != NULL) && (replacedUniChar != NULL) && (status != NULL), exception, exitNow);
718 uregex_reset(cacheSlot->icu_regex, 0, status);
720 // Work around for ICU uregex_reset() bug, see http://bugs.icu-project.org/trac/ticket/6545
721 // http://sourceforge.net/tracker/index.php?func=detail&aid=2105213&group_id=204582&atid=990188
722 if((cacheSlot->setToLength == 0) && (*status == 8)) { *status = 0; }
724 while(uregex_findNext(cacheSlot->icu_regex, status)) {
726 u16Length += uregex_appendReplacement(cacheSlot->icu_regex, replacementUniChar, replacementU16Length, &replacedUniChar, &replacedU16Capacity, status);
728 u16Length += uregex_appendTail(cacheSlot->icu_regex, &replacedUniChar, &replacedU16Capacity, status);
730 if(replacedCount != 0) { *replacedCount = replaced; }
735 static void rkl_clearStringCache(void) {
736 NSCParameterAssert(cacheSpinLock != 0);
737 lastCacheSlot = NULL;
739 for(x = 0; x < SCRATCH_BUFFERS; x++) { if(scratchBuffer[x] != NULL) { free(scratchBuffer[x]); scratchBuffer[x] = NULL; } }
740 for(x = 0; x < RKL_CACHE_SIZE; x++) { clearCacheSlotRegex(&RKLCache[x]); clearCacheSlotSetTo(&RKLCache[x]); }
741 clearBuffer(&fixedBuffer, NO);
742 clearBuffer(&dynamicBuffer, YES);
745 static void clearBuffer(RKLBuffer *buffer, int freeDynamicBuffer) {
746 if(buffer == NULL) { return; }
747 if((freeDynamicBuffer == YES) && (buffer->uniChar != NULL) && (buffer == &dynamicBuffer)) { free(dynamicBuffer.uniChar); dynamicBuffer.uniChar = NULL; }
748 if(buffer->string != NULL) { CFRelease(buffer->string); buffer->string = NULL; }
753 static void clearCacheSlotRegex(RKLCacheSlot *cacheSlot) {
754 if(cacheSlot == NULL) { return; }
755 if(cacheSlot->regexString != NULL) { CFRelease(cacheSlot->regexString); cacheSlot->regexString = NULL; cacheSlot->options = 0U; }
756 if(cacheSlot->icu_regex != NULL) { uregex_close(cacheSlot->icu_regex); cacheSlot->icu_regex = NULL; cacheSlot->captureCount = -1L; }
757 if(cacheSlot->setToString != NULL) { clearCacheSlotSetTo(cacheSlot); }
760 static void clearCacheSlotSetTo(RKLCacheSlot *cacheSlot) {
761 if(cacheSlot == NULL) { return; }
762 if(cacheSlot->icu_regex != NULL) { int32_t status = 0; uregex_setText(cacheSlot->icu_regex, &emptyUniCharString[0], 0, &status); }
763 if(cacheSlot->setToString != NULL) { CFRelease(cacheSlot->setToString); cacheSlot->setToString = NULL; }
764 cacheSlot->setToLength = 0L;
765 cacheSlot->setToHash = 0UL;
766 cacheSlot->setToIsImmutable = cacheSlot->setToNeedsConversion = 0UL;
767 cacheSlot->lastFindRange = cacheSlot->lastMatchRange = cacheSlot->setToRange = NSNotFoundRange;
768 cacheSlot->setToUniChar = NULL;
771 // Helps to keep things tidy.
772 #define addKeyAndObject(objs, keys, i, k, o) ({id _o=(o), _k=(k); if((_o != NULL) && (_k != NULL)) { objs[i] = _o; keys[i] = _k; i++; } })
774 static NSDictionary *userInfoDictionary(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status, ...) {
776 va_start(varArgsList, status);
778 if(regexString == NULL) { return(NULL); }
780 id objects[64], keys[64];
781 NSUInteger count = 0;
783 NSString *errorNameString = [NSString stringWithUTF8String:u_errorName(status)];
785 addKeyAndObject(objects, keys, count, RKLICURegexRegexErrorKey, regexString);
786 addKeyAndObject(objects, keys, count, RKLICURegexRegexOptionsErrorKey, [NSNumber numberWithUnsignedInt:options]);
787 addKeyAndObject(objects, keys, count, RKLICURegexErrorCodeErrorKey, [NSNumber numberWithInt:status]);
788 addKeyAndObject(objects, keys, count, RKLICURegexErrorNameErrorKey, errorNameString);
790 if((parseError != NULL) && (parseError->line != -1)) {
791 NSString *preContextString = [NSString stringWithCharacters:&parseError->preContext[0] length:(NSUInteger)u_strlen(&parseError->preContext[0])];
792 NSString *postContextString = [NSString stringWithCharacters:&parseError->postContext[0] length:(NSUInteger)u_strlen(&parseError->postContext[0])];
794 addKeyAndObject(objects, keys, count, RKLICURegexLineErrorKey, [NSNumber numberWithInt:parseError->line]);
795 addKeyAndObject(objects, keys, count, RKLICURegexOffsetErrorKey, [NSNumber numberWithInt:parseError->offset]);
796 addKeyAndObject(objects, keys, count, RKLICURegexPreContextErrorKey, preContextString);
797 addKeyAndObject(objects, keys, count, RKLICURegexPostContextErrorKey, postContextString);
798 addKeyAndObject(objects, keys, count, @"NSLocalizedFailureReason", ([NSString stringWithFormat:@"The error %@ occurred at line %d, column %d: %@<<HERE>>%@", errorNameString, parseError->line, parseError->offset, preContextString, postContextString]));
800 addKeyAndObject(objects, keys, count, @"NSLocalizedFailureReason", ([NSString stringWithFormat:@"The error %@ occurred.", errorNameString]));
803 while(count < 62) { id obj = va_arg(varArgsList, id), key = va_arg(varArgsList, id); if((obj != NULL) && (key != NULL)) { addKeyAndObject(objects, keys, count, key, obj); } else { break; } }
805 return([NSDictionary dictionaryWithObjects:&objects[0] forKeys:&keys[0] count:count]);
808 static NSError *RKLNSErrorForRegex(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status) {
809 return([NSError errorWithDomain:RKLICURegexErrorDomain code:(NSInteger)status userInfo:userInfoDictionary(regexString, options, parseError, status, @"There was an error compiling the regular expression.", @"NSLocalizedDescription", NULL)]);
812 static NSException *RKLNSExceptionForRegex(NSString *regexString, RKLRegexOptions options, const UParseError *parseError, int status) {
813 return([NSException exceptionWithName:RKLICURegexException reason:[NSString stringWithFormat:@"ICU regular expression error #%d, %s", status, u_errorName(status)] userInfo:userInfoDictionary(regexString, options, parseError, status, NULL)]);
816 static NSDictionary *RKLCAssertDictionary(const char *function, const char *file, int line, NSString *format, ...) {
818 va_start(varArgsList, format);
819 NSString *formatString = [[[NSString alloc] initWithFormat:format arguments:varArgsList] autorelease];
821 NSString *functionString = [NSString stringWithUTF8String:function], *fileString = [NSString stringWithUTF8String:file];
822 return([NSDictionary dictionaryWithObjectsAndKeys:formatString, @"description", functionString, @"function", fileString, @"file", [NSNumber numberWithInt:line], @"line", NSInternalInconsistencyException, @"exceptionName", NULL]);
825 static NSString *RKLStringFromClassAndMethod(id object, SEL selector, NSString *format, ...) {
827 va_start(varArgsList, format);
828 NSString *formatString = [[[NSString alloc] initWithFormat:format arguments:varArgsList] autorelease];
830 Class objectsClass = [object class];
831 return([NSString stringWithFormat:@"*** %c[%@ %@]: %@", (object == objectsClass) ? '+' : '-', NSStringFromClass(objectsClass), NSStringFromSelector(selector), formatString]);
834 @implementation NSString (RegexKitLiteAdditions)
838 + (void)RKL_METHOD_PREPEND(clearStringCache)
840 OSSpinLockLock(&cacheSpinLock);
841 rkl_clearStringCache();
842 OSSpinLockUnlock(&cacheSpinLock);
845 // captureCountForRegex:
847 + (NSInteger)RKL_METHOD_PREPEND(captureCountForRegex):(NSString *)regex
849 return([self RKL_METHOD_PREPEND(captureCountForRegex):regex options:RKLNoOptions error:NULL]);
852 + (NSInteger)RKL_METHOD_PREPEND(captureCountForRegex):(NSString *)regex options:(RKLRegexOptions)options error:(NSError **)error
854 if((error != NULL) && (*error != NULL)) { *error = NULL; }
855 if(regex == NULL) { RKLRaiseException(NSInvalidArgumentException, @"The regular expression argument is NULL."); }
857 NSException *exception = NULL;
858 RKLCacheSlot *cacheSlot = NULL;
859 NSInteger captureCount = -1;
861 OSSpinLockLock(&cacheSpinLock);
862 if((cacheSlot = getCachedRegex(regex, options, error, &exception)) != NULL) { captureCount = cacheSlot->captureCount; }
863 OSSpinLockUnlock(&cacheSpinLock);
865 if(exception != NULL) { [exception raise]; }
866 return(captureCount);
871 // componentsSeparatedByRegex:
873 - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex
875 NSRange range = NSMaxiumRange;
876 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, 0, 0L, self, &range, NULL, NULL, NULL));
879 - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex range:(NSRange)range
881 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, 0, 0L, self, &range, NULL, NULL, NULL));
884 - (NSArray *)RKL_METHOD_PREPEND(componentsSeparatedByRegex):(NSString *)regex options:(RKLRegexOptions)options range:(NSRange)range error:(NSError **)error
886 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLSplitOp, regex, options, 0L, self, &range, NULL, error, NULL));
891 - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex
893 NSRange result = NSNotFoundRange, range = NSMaxiumRange;
894 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result));
895 return((result.location == NSNotFound) ? NO : YES);
898 - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex inRange:(NSRange)range
900 NSRange result = NSNotFoundRange;
901 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result));
902 return((result.location == NSNotFound) ? NO : YES);
905 - (BOOL)RKL_METHOD_PREPEND(isMatchedByRegex):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range error:(NSError **)error
907 NSRange result = NSNotFoundRange;
908 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, options, 0L, self, &range, NULL, error, (void **)((void *)&result));
909 return((result.location == NSNotFound) ? NO : YES);
914 - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex
916 NSRange result = NSNotFoundRange, range = NSMaxiumRange;
917 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result));
921 - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex capture:(NSInteger)capture
923 NSRange result = NSNotFoundRange, range = NSMaxiumRange;
924 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, capture, self, &range, NULL, NULL, (void **)((void *)&result));
928 - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex inRange:(NSRange)range
930 NSRange result = NSNotFoundRange;
931 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, 0, 0L, self, &range, NULL, NULL, (void **)((void *)&result));
935 - (NSRange)RKL_METHOD_PREPEND(rangeOfRegex):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range capture:(NSInteger)capture error:(NSError **)error
937 NSRange result = NSNotFoundRange;
938 performRegexOp(self, _cmd, (RKLRegexOp)RKLRangeOp, regex, options, capture, self, &range, NULL, error, (void **)((void *)&result));
944 - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex
946 return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:NSMaxiumRange capture:0L error:NULL]);
949 - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex capture:(NSInteger)capture
951 return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:NSMaxiumRange capture:capture error:NULL]);
954 - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex inRange:(NSRange)range
956 return([self RKL_METHOD_PREPEND(stringByMatching):regex options:RKLNoOptions inRange:range capture:0L error:NULL]);
959 - (NSString *)RKL_METHOD_PREPEND(stringByMatching):(NSString *)regex options:(RKLRegexOptions)options inRange:(NSRange)range capture:(NSInteger)capture error:(NSError **)error
961 NSRange matchedRange = [self RKL_METHOD_PREPEND(rangeOfRegex):regex options:options inRange:range capture:capture error:error];
962 return((matchedRange.location == NSNotFound) ? NULL : rkl_CFAutorelease(CFStringCreateWithSubstring(NULL, (CFStringRef)self, CFMakeRange(matchedRange.location, matchedRange.length))));
965 // stringByReplacingOccurrencesOfRegex:
967 - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement
969 NSRange searchRange = NSMaxiumRange;
970 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, 0, 0L, self, &searchRange, replacement, NULL, NULL));
973 - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement range:(NSRange)searchRange
975 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, 0, 0L, self, &searchRange, replacement, NULL, NULL));
978 - (NSString *)RKL_METHOD_PREPEND(stringByReplacingOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement options:(RKLRegexOptions)options range:(NSRange)searchRange error:(NSError **)error
980 return(performRegexOp(self, _cmd, (RKLRegexOp)RKLReplaceOp, regex, options, 0L, self, &searchRange, replacement, error, NULL));
986 @implementation NSMutableString (RegexKitLiteAdditions)
988 // replaceOccurrencesOfRegex:
990 - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement
992 NSRange searchRange = NSMaxiumRange;
993 NSUInteger replacedCount = 0;
994 performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, 0, 0L, self, &searchRange, replacement, NULL, (void **)((void *)&replacedCount));
995 return(replacedCount);
998 - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement range:(NSRange)searchRange
1000 NSUInteger replacedCount = 0;
1001 performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, 0, 0L, self, &searchRange, replacement, NULL, (void **)((void *)&replacedCount));
1002 return(replacedCount);
1005 - (NSUInteger)RKL_METHOD_PREPEND(replaceOccurrencesOfRegex):(NSString *)regex withString:(NSString *)replacement options:(RKLRegexOptions)options range:(NSRange)searchRange error:(NSError **)error
1007 NSUInteger replacedCount = 0;
1008 performRegexOp(self, _cmd, (RKLRegexOp)(RKLReplaceOp | RKLReplaceMutable), regex, options, 0L, self, &searchRange, replacement, error, (void **)((void *)&replacedCount));
1009 return(replacedCount);