'use strict' require('./config.js'); global.Request = require('request'); let lastPinTime = 0,firstRepinTime = 0,currentTask = null,articleCount = 0; let mysqlConfig = { pool:{ name: 'TouTiaoFav', maxconn: 5 }, db:Config.mysqlConfig }; let headers = { 'Host': 'www.toutiao.com','User-Agent':'Paw/3.1 (Macintosh; OS X/10.12.6) GCDHTTPRequest', 'Cookie':'install_id=20457824635; ttreq=1$76925223765b5c30e70d4d35057e7c8d0f6abce6; alert_coverage=20; qh[360]=1; _ga=GA1.2.1897556923.1505932678; sessionid=6a2793f3363dc9444631d6ea4a909996; sid_guard=6a2793f3363dc9444631d6ea4a909996%7C1514306238%7C2592000%7CThu%2C+25-Jan-2018+16%3A37%3A18+GMT; sid_tt=6a2793f3363dc9444631d6ea4a909996; uid_tt=72dc0018060086d5c191476eaa449db1; login_flag=dfcae148f63adb82f58df7993787ba70; odin_tt=9715d5e76f01d8bfac4f8518405e218913dde1adb7dd677f9ec02c4603d5034af08b0568d8c103c5d264d57e1d62cd08; UM_distinctid=15eebdefca9236-0e5d58bc43fb0e-2834516c-4a640-15eebdefcaa9a2', 'X-SS-Cookie':'install_id=20457824635; ttreq=1$76925223765b5c30e70d4d35057e7c8d0f6abce6; alert_coverage=20; qh[360]=1; _ga=GA1.2.1897556923.1505932678; sessionid=6a2793f3363dc9444631d6ea4a909996; sid_guard=6a2793f3363dc9444631d6ea4a909996%7C1514306238%7C2592000%7CThu%2C+25-Jan-2018+16%3A37%3A18+GMT; sid_tt=6a2793f3363dc9444631d6ea4a909996; uid_tt=72dc0018060086d5c191476eaa449db1; login_flag=dfcae148f63adb82f58df7993787ba70; odin_tt=9715d5e76f01d8bfac4f8518405e218913dde1adb7dd677f9ec02c4603d5034af08b0568d8c103c5d264d57e1d62cd08; UM_distinctid=15eebdefca9236-0e5d58bc43fb0e-2834516c-4a640-15eebdefcaa9a2' }; global.MysqlPool = require(ROOTPATH + '/lib/mysql-pool.js').instance(Config.mysqlConfig); testMysql(); function testMysql(){ MysqlPool.getConnection(function(err,con){ con.query('SELECT VERSION() as version',function(err,result,fields){ if(err){ LOGGER.error('Mysql Connect error,please recheck your config'); LOGGER.error(err); }else{ LOGGER.info('Mysql Connect success'); LOGGER.info('Mysql Version: ' + result[0]['version'] + ' | User: ' + Config.mysqlConfig.user + ' | Database: ' + Config.mysqlConfig.database); global.MysqlDB = require(ROOTPATH + '/lib/mysqldb.js'); init(); } }); }); } function init(){ LOGGER.info(`Last Pin Time: ${FinalRepinTime}|${new Date(FinalRepinTime*1000)}`); currentTask = new Task(); } function finish(){ FinalRepinTime = firstRepinTime; FILE.writeFileSync(FinalRepinFile,FinalRepinTime,'UTF-8'); LOGGER.info('--------------------------------------------------------------'); LOGGER.info('All fav article download finished!!!'); LOGGER.info(`Article analysis count [ ${articleCount} ]`); LOGGER.info('--------------------------------------------------------------'); MysqlPool.end(); } function Task(){ let artticleList = []; let current = -1; let hasMore = false; getList(); function getList(){ let url = Config.listUrl; let args = []; args.push('page_type=2'); args.push(`user_id=${Config.uid}`); args.push('max_behot_time=0'); args.push('count=20'); if(lastPinTime>0) args.push(`max_repin_time=${lastPinTime}`); url += args.join('&'); Request({url:url,encoding: 'utf-8',json:true,headers:headers},function(err,response,body){ if(err){ LOGGER.error(`List get error,max repin time:${lastPinTime}`); LOGGER.error(`URL:${url}`); return; } if(lastPinTime==0){ firstRepinTime = body.data[0].repin_time; if(FinalRepinTime==firstRepinTime){ finish(); return; } } hasMore = body.has_more; lastPinTime = body.max_repin_time; artticleList = body.data; getArticle(); }); } function getArticle(){ current++; if(current==artticleList.length){ return taskFinish(); } let url = Config.articleUrl; let article = artticleList[current]; if(FinalRepinTime == article.repin_time){ finish(); return; } article.content = ''; url += article.item_id; Request({url:url,encoding: 'utf-8',headers:headers},function(err,response,body){ if(err){ LOGGER.error(`Article get error,max repin time:${lastPinTime},article id:${article.item_id}`); LOGGER.error(`URL:${url}`); return; } let content; if(article.has_gallery){ content = body.match(/gallery: (.*),/); if(content && content.length>=2){ content = content[1]; if(content.match(/JSON\.parse\("(.*)"\)/)){ content = eval(content); }else{ content = JSON.parse(content).sub_images; } let arr = []; for(let i in content){ arr.push(content[i].url); } article.content = arr.join(','); } }else if(article.has_video){ content = body.match(/shareUrl: '(.*)'/); if(content && content.length>=2) article.content = content[1]; }else{ content = body.match(/content: '(.*)'\.replace/); if(content && content.length>=2) article.content = (content[1]).replace(/\"/g,'\\"'); } if(article.content==''){ getArticle(); return; } insertArticle(article); }); } function insertArticle(article){ // console.log(article.content); let tempArr = []; let type = article.has_video?2:article.has_gallery?1:0; tempArr.push(`"${article.title.replace(/\"/g,'\\"')}"`); tempArr.push(type); tempArr.push(`"${article.chinese_tag}"`); tempArr.push(`"${article.image_url}"`); tempArr.push(`"${article.abstract.replace(/\"/g,'\\"')}"`); tempArr.push(article.gallary_image_count); tempArr.push(`"${article.display_url}"`); tempArr.push(`"${article.item_id}"`); let sql = `insert IGNORE into list (title,type,tag,cover,abstract,gallaryCount,source,item_id) values(${tempArr.join(',')})`; MysqlDB.query(sql,function(err,result){ if(err){ LOGGER.error(`Article insert error,article id:${article.item_id}`); LOGGER.error(`SQL:${sql}`); return; } sql = `insert ignore into article (articleID,content) values("${article.item_id}","${article.content}")` MysqlDB.query(sql,function(err,result){ if(err){ LOGGER.error(`Article insert error,article id:${article.item_id}`); LOGGER.error(`SQL:${sql}`); return; } articleCount++; getArticle(); }); }); } function taskFinish(){ LOGGER.info('List Finished!'); if(hasMore){ LOGGER.info('More List read!!!'); currentTask = null; currentTask = new Task(); }else{ finish(); } } }