HTMLParser.m 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. //
  2. // HTMLParser.m
  3. // StackOverflow
  4. //
  5. // Created by Ben Reeves on 09/03/2010.
  6. // Copyright 2010 Ben Reeves. All rights reserved.
  7. //
  8. #import "HTMLParser.h"
  9. @implementation HTMLParser
  10. -(HTMLNode*)doc
  11. {
  12. if (_doc == NULL)
  13. return NULL;
  14. return [[HTMLNode alloc] initWithXMLNode:(xmlNode*)_doc];
  15. }
  16. -(HTMLNode*)html
  17. {
  18. if (_doc == NULL)
  19. return NULL;
  20. return [[self doc] findChildTag:@"html"];
  21. }
  22. -(HTMLNode*)head
  23. {
  24. if (_doc == NULL)
  25. return NULL;
  26. return [[self doc] findChildTag:@"head"];
  27. }
  28. -(HTMLNode*)body
  29. {
  30. if (_doc == NULL)
  31. return NULL;
  32. return [[self doc] findChildTag:@"body"];
  33. }
  34. -(id)initWithString:(NSString*)string error:(NSError**)error
  35. {
  36. if (self = [super init])
  37. {
  38. _doc = NULL;
  39. if ([string length] > 0)
  40. {
  41. CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(NSUTF8StringEncoding);
  42. CFStringRef cfencstr = CFStringConvertEncodingToIANACharSetName(cfenc);
  43. const char *enc = CFStringGetCStringPtr(cfencstr, 0);
  44. // _doc = htmlParseDoc((xmlChar*)[string UTF8String], enc);
  45. int optionsHtml = HTML_PARSE_RECOVER;
  46. optionsHtml = optionsHtml | HTML_PARSE_NOERROR; //Uncomment this to see HTML errors
  47. optionsHtml = optionsHtml | HTML_PARSE_NOWARNING;
  48. _doc = htmlReadDoc ((xmlChar*)[string UTF8String], NULL, enc, optionsHtml);
  49. }
  50. else
  51. {
  52. if (error) {
  53. *error = [NSError errorWithDomain:@"HTMLParserdomain" code:1 userInfo:nil];
  54. }
  55. }
  56. }
  57. return self;
  58. }
  59. -(id)initWithData:(NSData*)data error:(NSError**)error
  60. {
  61. if (self = [super init])
  62. {
  63. _doc = NULL;
  64. if (data)
  65. {
  66. CFStringEncoding cfenc = CFStringConvertNSStringEncodingToEncoding(NSUTF8StringEncoding);
  67. CFStringRef cfencstr = CFStringConvertEncodingToIANACharSetName(cfenc);
  68. const char *enc = CFStringGetCStringPtr(cfencstr, 0);
  69. //_doc = htmlParseDoc((xmlChar*)[data bytes], enc);
  70. _doc = htmlReadDoc((xmlChar*)[data bytes],
  71. "",
  72. enc,
  73. XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
  74. }
  75. else
  76. {
  77. if (error)
  78. {
  79. *error = [NSError errorWithDomain:@"HTMLParserdomain" code:1 userInfo:nil];
  80. }
  81. }
  82. }
  83. return self;
  84. }
  85. -(id)initWithContentsOfURL:(NSURL*)url error:(NSError**)error
  86. {
  87. NSData * _data = [[NSData alloc] initWithContentsOfURL:url options:0 error:error];
  88. if (_data == nil || *error)
  89. {
  90. return nil;
  91. }
  92. self = [self initWithData:_data error:error];
  93. return self;
  94. }
  95. -(void)dealloc
  96. {
  97. if (_doc)
  98. {
  99. xmlFreeDoc(_doc);
  100. }
  101. }
  102. @end