|
@@ -0,0 +1,174 @@
|
|
|
+'use strict'
|
|
|
+require('./config.js');
|
|
|
+global.Request = require('request');
|
|
|
+
|
|
|
+let lastPinTime = 0,firstRepinTime = 0,currentTask = null,articleCount = 0;
|
|
|
+let mysqlConfig = {
|
|
|
+ pool:{
|
|
|
+ name: 'TouTiaoFav',
|
|
|
+ maxconn: 5
|
|
|
+ },
|
|
|
+ db:Config.mysqlConfig
|
|
|
+};
|
|
|
+let headers = {
|
|
|
+ 'Host': 'www.toutiao.com','User-Agent':'Paw/3.1 (Macintosh; OS X/10.12.6) GCDHTTPRequest',
|
|
|
+ 'Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0',
|
|
|
+ 'X-SS-Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0'
|
|
|
+};
|
|
|
+
|
|
|
+global.MysqlPool = require(ROOTPATH + '/lib/mysql-pool.js').instance(mysqlConfig);
|
|
|
+testMysql();
|
|
|
+function testMysql(){
|
|
|
+ let con = MysqlPool.getConnection(mysqlConfig.pool.name);
|
|
|
+ con.query('SELECT VERSION() as version',function(err,result,fields){
|
|
|
+ MysqlPool.freeConnection(mysqlConfig.pool.name,con);
|
|
|
+ if(err){
|
|
|
+ LOGGER.error('Mysql Connect error,please recheck your config');
|
|
|
+ LOGGER.error(err);
|
|
|
+ }else{
|
|
|
+ LOGGER.info('Mysql Connect success');
|
|
|
+ LOGGER.info('Mysql Version: ' + result[0]['version'] + ' | User: ' + Config.mysqlConfig.user + ' | Database: ' + Config.mysqlConfig.database);
|
|
|
+ global.MysqlDB = require(ROOTPATH + '/lib/mysqldb.js');
|
|
|
+ init();
|
|
|
+ }
|
|
|
+});
|
|
|
+}
|
|
|
+
|
|
|
+function init(){
|
|
|
+ LOGGER.info(`Last Pin Time: ${FinalRepinTime}|${new Date(FinalRepinTime*1000)}`);
|
|
|
+ currentTask = new Task();
|
|
|
+}
|
|
|
+
|
|
|
+function finish(){
|
|
|
+ FinalRepinTime = firstRepinTime;
|
|
|
+ FILE.writeFileSync(FinalRepinFile,FinalRepinTime,'UTF-8');
|
|
|
+ LOGGER.info('--------------------------------------------------------------');
|
|
|
+ LOGGER.info('All fav article download finished!!!');
|
|
|
+ LOGGER.info(`Article analysis count [ ${articleCount} ]`);
|
|
|
+ LOGGER.info('--------------------------------------------------------------');
|
|
|
+ MysqlPool.end();
|
|
|
+}
|
|
|
+
|
|
|
+function Task(){
|
|
|
+ let artticleList = [];
|
|
|
+ let current = -1;
|
|
|
+ let hasMore = false;
|
|
|
+
|
|
|
+ getList();
|
|
|
+ function getList(){
|
|
|
+ let url = Config.listUrl;
|
|
|
+ let args = [];
|
|
|
+ args.push('page_type=2');
|
|
|
+ args.push(`user_id=${Config.uid}`);
|
|
|
+ args.push('max_behot_time=0');
|
|
|
+ args.push('count=20');
|
|
|
+ if(lastPinTime>0) args.push(`max_repin_time=${lastPinTime}`);
|
|
|
+ url += args.join('&');
|
|
|
+ Request({url:url,encoding: 'utf-8',json:true,headers:headers},function(err,response,body){
|
|
|
+ if(err){
|
|
|
+ LOGGER.error(`List get error,max repin time:${lastPinTime}`);
|
|
|
+ LOGGER.error(`URL:${url}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if(lastPinTime==0){
|
|
|
+ firstRepinTime = body.data[0].repin_time;
|
|
|
+ if(FinalRepinTime==firstRepinTime){
|
|
|
+ finish();
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ hasMore = body.has_more;
|
|
|
+ lastPinTime = body.max_repin_time;
|
|
|
+ artticleList = body.data;
|
|
|
+ getArticle();
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ function getArticle(){
|
|
|
+ current++;
|
|
|
+ if(current==artticleList.length){
|
|
|
+ return taskFinish();
|
|
|
+ }
|
|
|
+ let url = Config.articleUrl;
|
|
|
+ let article = artticleList[current];
|
|
|
+ if(FinalRepinTime == article.repin_time){
|
|
|
+ finish();
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ article.content = '';
|
|
|
+ url += article.item_id;
|
|
|
+ Request({url:url,encoding: 'utf-8',headers:headers},function(err,response,body){
|
|
|
+ if(err){
|
|
|
+ LOGGER.error(`Article get error,max repin time:${lastPinTime},article id:${article.item_id}`);
|
|
|
+ LOGGER.error(`URL:${url}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ let content;
|
|
|
+ if(article.has_gallery){
|
|
|
+ content = body.match(/gallery: (.*),/);
|
|
|
+ if(content && content.length>=2){
|
|
|
+ content = JSON.parse(content[1]).sub_images;
|
|
|
+ let arr = [];
|
|
|
+ for(let i in content){
|
|
|
+ arr.push(content[i].url);
|
|
|
+ }
|
|
|
+ article.content = arr.join(',');
|
|
|
+ }
|
|
|
+ }else if(article.has_video){
|
|
|
+ content = body.match(/shareUrl: '(.*)'/);
|
|
|
+ if(content && content.length>=2) article.content = content[1];
|
|
|
+ }else{
|
|
|
+ content = body.match(/content: '(.*)'\.replace/);
|
|
|
+ if(content && content.length>=2) article.content = (content[1]).replace(/\"/g,'\\"');
|
|
|
+ }
|
|
|
+ if(article.content==''){
|
|
|
+ getArticle();
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ insertArticle(article);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ function insertArticle(article){
|
|
|
+
|
|
|
+ let tempArr = [];
|
|
|
+ let type = article.has_video?2:article.has_gallery?1:0;
|
|
|
+ tempArr.push(`"${article.title.replace(/\"/g,'\\"')}"`);
|
|
|
+ tempArr.push(type);
|
|
|
+ tempArr.push(`"${article.chinese_tag}"`);
|
|
|
+ tempArr.push(`"${article.image_url}"`);
|
|
|
+ tempArr.push(`"${article.abstract.replace(/\"/g,'\\"')}"`);
|
|
|
+ tempArr.push(article.gallary_image_count);
|
|
|
+ tempArr.push(`"${article.display_url}"`);
|
|
|
+ tempArr.push(`"${article.item_id}"`);
|
|
|
+ let sql = `insert IGNORE into list (title,type,tag,cover,abstract,gallaryCount,source,item_id) values(${tempArr.join(',')})`;
|
|
|
+ MysqlDB.query(sql,function(err,result){
|
|
|
+ if(err){
|
|
|
+ LOGGER.error(`Article insert error,article id:${article.item_id}`);
|
|
|
+ LOGGER.error(`SQL:${sql}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ sql = `insert ignore into article (articleID,content) values("${article.item_id}","${article.content}")`
|
|
|
+ MysqlDB.query(sql,function(err,result){
|
|
|
+ if(err){
|
|
|
+ LOGGER.error(`Article insert error,article id:${article.item_id}`);
|
|
|
+ LOGGER.error(`SQL:${sql}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ articleCount++;
|
|
|
+ getArticle();
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ }
|
|
|
+ function taskFinish(){
|
|
|
+ LOGGER.info('List Finished!');
|
|
|
+ if(hasMore){
|
|
|
+ LOGGER.info('More List read!!!');
|
|
|
+ currentTask = null;
|
|
|
+ currentTask = new Task();
|
|
|
+ }else{
|
|
|
+ finish();
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|