/* character iterator */ #define CHARACTER_ITERATOR_WITH(a_iterator) \ unsigned int UTF8_total_count_for_character = 0; \ CHARACTER_WITH character_position = 0;/*FIXME=position*/ CODEPOINT_ITERATOR_WITH(a_iterator) /* well, technically not... */ #define CHARACTER_ITERATOR_END_P CODEPOINT_ITERATOR_END_P #define CHARACTER_COLLECT_CODEPOINTS(body) \ if(CHARACTER_CODEPOINT_COUNT == 0 && !Unicode_is_combining_suffix(codepoint)) { \ character_position = position; \ CHARACTER_ADD_CODEPOINT(codepoint) \ if(codepoint < 32) { /* handle control characters immediately */ \ {body} CHARACTER_CLEAR; \ } \ } else if (CHARACTER_CODEPOINT_COUNT > 0 && Unicode_is_combining_suffix(codepoint)) { \ CHARACTER_ADD_CODEPOINT(codepoint) \ } else { \ {body} CHARACTER_CLEAR; \ CHARACTER_ADD_CODEPOINT(codepoint) \ } /* FIXME this has a design flaw: if the buffer looks like this: codepoint [gap] combining-codepoint it will (and can) not detect it. The entire loop construct of BUFFER_ITERATOR_WITH is not suited to do so. Even calling #buffer_iterator_fast_get one more time will not help since it will just return the item you already had (and you don't want to advance since maybe you want to give this entire character to another proc for working on it). */ #define CHARACTER_ITERATOR_NEXT(a_iterator) \ data += UTF8_buffer_count, \ count -= UTF8_buffer_count, \ position += UTF8_buffer_count, \ UTF8_total_count_for_character += UTF8_buffer_count buffer_iterator_skip(a_iterator, UTF8_total_count_for_character), \ #define CHARACTER_ITERATOR_DECODE(body) \ CODEPOINT_ITERATOR_DECODE; \ CHARACTER_COLLECT_CODEPOINTS(body) #define CHARACTER_ITERATOR_FOREACH(a_iterator, body) \ CHARACTER_ITERATOR_WITH(a_iterator) { \ for(; !CHARACTER_ITERATOR_END_P; CHARACTER_ITERATOR_NEXT(a_iterator)) { \ CHARACTER_ITERATOR_DECODE(body) \ } \ } \ if(CHARACTER_CODEPOINT_COUNT > 0) { { body } CHARACTER_CLEAR; } /* simpler: */ static inline codepoint_t buffer_codepoint_at(struct Buffer* buffer, unsigned int position, unsigned int* count) { codepoint_t result; unsigned int requested_count; buffer_item_t* data; struct BufferIterator* iterator; result = buffer_item_at(buffer, position); requested_count = UTF8_decode_first(result); if(requested_count > 1) { iterator = buffer_iterate(buffer, position, position + requested_count); *count = buffer_iterator_fast_get(iterator, &data, requested_count); assert(*count == requested_count); return UTF8_decode(data); } else { *count = 1; return result; } } #define CHARACTER_FOREACH(buffer, beginning_index, end_index, body) \ unsigned int position; \ codepoint_t codepoint; \ position = beginning_index; \ CHARACTER_WITH { \ while(position < end_index) { \ codepoint = buffer_codepoint_at(buffer, position, &count); \ CHARACTER_COLLECT_CODEPOINTS(body) \ position += count; \ } \ if(CHARACTER_CODEPOINT_COUNT > 0) { { body } CHARACTER_CLEAR; } \ } /* end character iterator */