123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- //
- // HTMLNode.m
- // StackOverflow
- //
- // Created by Ben Reeves on 09/03/2010.
- // Copyright 2010 Ben Reeves. All rights reserved.
- //
- #import "HTMLNode.h"
- #import <libxml/HTMLtree.h>
- @implementation HTMLNode
- -(HTMLNode*)parent
- {
- return [[HTMLNode alloc] initWithXMLNode:_node->parent];
- }
- -(HTMLNode*)nextSibling {
- return [[HTMLNode alloc] initWithXMLNode:_node->next];
- }
- -(HTMLNode*)previousSibling {
- return [[HTMLNode alloc] initWithXMLNode:_node->prev];
- }
- void setAttributeNamed(xmlNode * node, const char * nameStr, const char * value) {
-
- char * newVal = (char *)malloc(strlen(value)+1);
- memcpy (newVal, value, strlen(value)+1);
- for(xmlAttrPtr attr = node->properties; NULL != attr; attr = attr->next)
- {
- if (strcmp((char*)attr->name, nameStr) == 0)
- {
- for(xmlNode * child = attr->children; NULL != child; child = child->next)
- {
- free(child->content);
- child->content = (xmlChar*)newVal;
- break;
- }
- break;
- }
- }
-
-
- }
- NSString * getAttributeNamed(xmlNode * node, const char * nameStr)
- {
- for(xmlAttrPtr attr = node->properties; NULL != attr; attr = attr->next)
- {
- if (strcmp((char*)attr->name, nameStr) == 0)
- {
- for(xmlNode * child = attr->children; NULL != child; child = child->next)
- {
- return [NSString stringWithCString:(void*)child->content encoding:NSUTF8StringEncoding];
-
- }
- break;
- }
- }
-
- return NULL;
- }
- -(NSString*)getAttributeNamed:(NSString*)name
- {
- const char * nameStr = [name UTF8String];
-
- return getAttributeNamed(_node, nameStr);
- }
- //Returns the class name
- -(NSString*)className
- {
- return [self getAttributeNamed:@"class"];
- }
- //Returns the tag name
- -(NSString*)tagName
- {
- return [NSString stringWithCString:(void*)_node->name encoding:NSUTF8StringEncoding];
- }
- -(HTMLNode*)firstChild
- {
- return [[HTMLNode alloc] initWithXMLNode:_node->children];
- }
- -(void)findChildrenWithAttribute:(const char*)attribute matchingName:(const char*)className inXMLNode:(xmlNode *)node inArray:(NSMutableArray*)array allowPartial:(BOOL)partial
- {
- xmlNode *cur_node = NULL;
- const char * classNameStr = className;
- //BOOL found = NO;
-
- for (cur_node = node; cur_node; cur_node = cur_node->next)
- {
- for(xmlAttrPtr attr = cur_node->properties; NULL != attr; attr = attr->next)
- {
-
- if (strcmp((char*)attr->name, attribute) == 0)
- {
- for(xmlNode * child = attr->children; NULL != child; child = child->next)
- {
-
- BOOL match = NO;
- if (!partial && strcmp((char*)child->content, classNameStr) == 0)
- match = YES;
- else if (partial && strstr ((char*)child->content, classNameStr) != NULL)
- match = YES;
- if (match)
- {
- //Found node
- HTMLNode * nNode = [[HTMLNode alloc] initWithXMLNode:cur_node];
- [array addObject:nNode];
- break;
- }
- }
- break;
- }
- }
-
- [self findChildrenWithAttribute:attribute matchingName:className inXMLNode:cur_node->children inArray:array allowPartial:partial];
- }
-
- }
- -(void)findChildTags:(NSString*)tagName inXMLNode:(xmlNode *)node inArray:(NSMutableArray*)array
- {
- xmlNode *cur_node = NULL;
- const char * tagNameStr = [tagName UTF8String];
-
- if (tagNameStr == nil)
- return;
-
- for (cur_node = node; cur_node; cur_node = cur_node->next)
- {
- if (cur_node->name && strcmp((char*)cur_node->name, tagNameStr) == 0)
- {
- HTMLNode * node = [[HTMLNode alloc] initWithXMLNode:cur_node];
- [array addObject:node];
-
- }
-
- [self findChildTags:tagName inXMLNode:cur_node->children inArray:array];
- }
- }
- -(NSArray*)findChildTags:(NSString*)tagName
- {
- NSMutableArray * array = [NSMutableArray array];
-
- [self findChildTags:tagName inXMLNode:_node->children inArray:array];
-
- return array;
- }
- -(HTMLNode*)findChildTag:(NSString*)tagName inXMLNode:(xmlNode *)node
- {
- xmlNode *cur_node = NULL;
- const char * tagNameStr = [tagName UTF8String];
-
- for (cur_node = node; cur_node; cur_node = cur_node->next)
- {
- if (cur_node && cur_node->name && strcmp((char*)cur_node->name, tagNameStr) == 0)
- {
- return [[HTMLNode alloc] initWithXMLNode:cur_node];
- }
-
- HTMLNode * cNode = [self findChildTag:tagName inXMLNode:cur_node->children];
- if (cNode != NULL)
- {
- return cNode;
- }
- }
-
- return NULL;
- }
- -(HTMLNode*)findChildTag:(NSString*)tagName
- {
- return [self findChildTag:tagName inXMLNode:_node->children];
- }
- -(NSArray*)children
- {
- xmlNode *cur_node = NULL;
- NSMutableArray * array = [NSMutableArray array];
- for (cur_node = _node->children; cur_node; cur_node = cur_node->next)
- {
- HTMLNode * node = [[HTMLNode alloc] initWithXMLNode:cur_node];
- [array addObject:node];
- }
-
- return array;
- }
- /*
- -(NSString*)description
- {
- NSString * string = [NSString stringWithFormat:@"<%s>%@\n", _node->name, [self contents]];
-
- for (HTMLNode * child in [self children])
- {
- string = [string stringByAppendingString:[child description]];
- }
-
- string = [string stringByAppendingString:[NSString stringWithFormat:@"<%s>\n", _node->name]];
- return string;
- }*/
- -(HTMLNode*)findChildWithAttribute:(const char*)attribute matchingName:(const char*)name inXMLNode:(xmlNode *)node allowPartial:(BOOL)partial
- {
- xmlNode *cur_node = NULL;
- const char * classNameStr = name;
- //BOOL found = NO;
- if (node == NULL)
- return NULL;
-
- for (cur_node = node; cur_node; cur_node = cur_node->next)
- {
- for(xmlAttrPtr attr = cur_node->properties; NULL != attr; attr = attr->next)
- {
- if (strcmp((char*)attr->name, attribute) == 0)
- {
- for(xmlNode * child = attr->children; NULL != child; child = child->next)
- {
-
- BOOL match = NO;
- if (!partial && strcmp((char*)child->content, classNameStr) == 0)
- match = YES;
- else if (partial && strstr ((char*)child->content, classNameStr) != NULL)
- match = YES;
-
- if (match)
- {
- return [[HTMLNode alloc] initWithXMLNode:cur_node];
- }
- }
- break;
- }
- }
-
- HTMLNode * cNode = [self findChildWithAttribute:attribute matchingName:name inXMLNode:cur_node->children allowPartial:partial];
- if (cNode != NULL)
- {
- return cNode;
- }
- }
-
- return NULL;
- }
- -(HTMLNode*)findChildWithAttribute:(NSString*)attribute matchingName:(NSString*)className allowPartial:(BOOL)partial
- {
- return [self findChildWithAttribute:[attribute UTF8String] matchingName:[className UTF8String] inXMLNode:_node->children allowPartial:partial];
- }
- -(HTMLNode*)findChildOfClass:(NSString*)className
- {
- HTMLNode * node = [self findChildWithAttribute:"class" matchingName:[className UTF8String] inXMLNode:_node->children allowPartial:NO];
- return node;
- }
- -(NSArray*)findChildrenWithAttribute:(NSString*)attribute matchingName:(NSString*)className allowPartial:(BOOL)partial
- {
- NSMutableArray * array = [NSMutableArray array];
- [self findChildrenWithAttribute:[attribute UTF8String] matchingName:[className UTF8String] inXMLNode:_node->children inArray:array allowPartial:partial];
-
- return array;
- }
- -(NSArray*)findChildrenOfClass:(NSString*)className
- {
- return [self findChildrenWithAttribute:@"class" matchingName:className allowPartial:NO];
- }
- -(id)initWithXMLNode:(xmlNode*)xmlNode
- {
- if (self = [super init])
- {
- _node = xmlNode;
- }
- return self;
- }
- -(void)appendChildContentsToString:(NSMutableString*)string inNode:(xmlNode*)node
- {
- if (node == NULL)
- return;
-
- xmlNode *cur_node = NULL;
- for (cur_node = node; cur_node; cur_node = cur_node->next)
- {
- if (cur_node->content)
- {
- [string appendString:[NSString stringWithCString:(void*)cur_node->content encoding:NSUTF8StringEncoding]];
- }
-
- [self appendChildContentsToString:string inNode:cur_node->children];
- }
- }
- -(NSString*)contents
- {
- if (_node->children && _node->children->content)
- {
- return [NSString stringWithCString:(void*)_node->children->content encoding:NSUTF8StringEncoding];
- }
-
- return nil;
- }
- HTMLNodeType nodeType(xmlNode * _node)
- {
- if (_node == NULL || _node->name == NULL)
- return HTMLUnkownNode;
-
- const char * tagName = (const char*)_node->name;
- if (strcmp(tagName, "a") == 0)
- return HTMLHrefNode;
- else if (strcmp(tagName, "text") == 0)
- return HTMLTextNode;
- else if (strcmp(tagName, "code") == 0)
- return HTMLCodeNode;
- else if (strcmp(tagName, "span") == 0)
- return HTMLSpanNode;
- else if (strcmp(tagName, "p") == 0)
- return HTMLPNode;
- else if (strcmp(tagName, "ul") == 0)
- return HTMLUlNode;
- else if (strcmp(tagName, "li") == 0)
- return HTMLLiNode;
- else if (strcmp(tagName, "image") == 0)
- return HTMLImageNode;
- else if (strcmp(tagName, "ol") == 0)
- return HTMLOlNode;
- else if (strcmp(tagName, "strong") == 0)
- return HTMLStrongNode;
- else if (strcmp(tagName, "pre") == 0)
- return HTMLPreNode;
- else if (strcmp(tagName, "blockquote") == 0)
- return HTMLBlockQuoteNode;
- else
- return HTMLUnkownNode;
-
- }
- -(HTMLNodeType)nodetype
- {
- return nodeType(_node);
- }
- NSString * allNodeContents(xmlNode*node)
- {
- if (node == NULL)
- return nil;
-
- void * contents = xmlNodeGetContent(node);
- if (contents)
- {
-
- NSString * string = [NSString stringWithCString:contents encoding:NSUTF8StringEncoding];
- xmlFree(contents);
- return string;
- }
-
- return @"";
- }
- -(NSString*)allContents
- {
- return allNodeContents(_node);
- }
- NSString * rawContentsOfNode(xmlNode * node)
- {
- xmlBufferPtr buffer = xmlBufferCreateSize(1000);
- xmlOutputBufferPtr buf = xmlOutputBufferCreateBuffer(buffer, NULL);
-
- htmlNodeDumpOutput(buf, node->doc, node, (const char*)node->doc->encoding);
-
- xmlOutputBufferFlush(buf);
-
- NSString * string = nil;
-
- if (buffer->content) {
- string = [[NSString alloc] initWithBytes:(const void *)xmlBufferContent(buffer) length:xmlBufferLength(buffer) encoding:NSUTF8StringEncoding];
- }
-
- xmlOutputBufferClose(buf);
- xmlBufferFree(buffer);
-
- return string;
- }
- -(NSString*)rawContents {
- return rawContentsOfNode(_node);
- }
- @end
|