123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- //
- // HTMLParser.m
- // StackOverflow
- //
- // Created by Ben Reeves on 09/03/2010.
- // Copyright 2010 Ben Reeves. All rights reserved.
- //
- #import "HTMLParser.h"
- @implementation HTMLParser
- -(HTMLNode*)doc
- {
- if (_doc == NULL)
- return NULL;
-
- return [[HTMLNode alloc] initWithXMLNode:(xmlNode*)_doc];
- }
- -(HTMLNode*)html
- {
- if (_doc == NULL)
- return NULL;
-
- return [[self doc] findChildTag:@"html"];
- }
- -(HTMLNode*)head
- {
- if (_doc == NULL)
- return NULL;
- return [[self doc] findChildTag:@"head"];
- }
- -(HTMLNode*)body
- {
- if (_doc == NULL)
- return NULL;
-
- return [[self doc] findChildTag:@"body"];
- }
- -(id)initWithString:(NSString*)string error:(NSError**)error
- {
- if (self = [super init])
- {
- _doc = NULL;
-
- if ([string length] > 0)
- {
- CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(NSUTF8StringEncoding);
- CFStringRef cfencstr = CFStringConvertEncodingToIANACharSetName(cfenc);
- const char *enc = CFStringGetCStringPtr(cfencstr, 0);
- // _doc = htmlParseDoc((xmlChar*)[string UTF8String], enc);
- int optionsHtml = HTML_PARSE_RECOVER;
- optionsHtml = optionsHtml | HTML_PARSE_NOERROR; //Uncomment this to see HTML errors
- optionsHtml = optionsHtml | HTML_PARSE_NOWARNING;
- _doc = htmlReadDoc ((xmlChar*)[string UTF8String], NULL, enc, optionsHtml);
- }
- else
- {
- if (error) {
- *error = [NSError errorWithDomain:@"HTMLParserdomain" code:1 userInfo:nil];
- }
- }
- }
-
- return self;
- }
- -(id)initWithData:(NSData*)data error:(NSError**)error
- {
- if (self = [super init])
- {
- _doc = NULL;
- if (data)
- {
- CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(NSUTF8StringEncoding);
- CFStringRef cfencstr = CFStringConvertEncodingToIANACharSetName(cfenc);
- const char *enc = CFStringGetCStringPtr(cfencstr, 0);
- //_doc = htmlParseDoc((xmlChar*)[data bytes], enc);
-
- _doc = htmlReadDoc((xmlChar*)[data bytes],
- "",
- enc,
- XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
- }
- else
- {
- if (error)
- {
- *error = [NSError errorWithDomain:@"HTMLParserdomain" code:1 userInfo:nil];
- }
- }
- }
-
- return self;
- }
- -(id)initWithContentsOfURL:(NSURL*)url error:(NSError**)error
- {
-
- NSData * _data = [[NSData alloc] initWithContentsOfURL:url options:0 error:error];
- if (_data == nil || *error)
- {
- return nil;
- }
-
- self = [self initWithData:_data error:error];
-
- return self;
- }
- -(void)dealloc
- {
- if (_doc)
- {
- xmlFreeDoc(_doc);
- }
- }
- @end
|