123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- 'use strict'
- require('./config.js');
- global.Request = require('request');
- let lastPinTime = 0,firstRepinTime = 0,currentTask = null,articleCount = 0;
- let mysqlConfig = {
- pool:{
- name: 'TouTiaoFav',
- maxconn: 5
- },
- db:Config.mysqlConfig
- };
- let headers = {
- 'Host': 'www.toutiao.com','User-Agent':'Paw/3.1 (Macintosh; OS X/10.12.6) GCDHTTPRequest',
- 'Cookie':'install_id=20457824635; ttreq=1$76925223765b5c30e70d4d35057e7c8d0f6abce6; alert_coverage=20; qh[360]=1; _ga=GA1.2.1897556923.1505932678; sessionid=6a2793f3363dc9444631d6ea4a909996; sid_guard=6a2793f3363dc9444631d6ea4a909996%7C1514306238%7C2592000%7CThu%2C+25-Jan-2018+16%3A37%3A18+GMT; sid_tt=6a2793f3363dc9444631d6ea4a909996; uid_tt=72dc0018060086d5c191476eaa449db1; login_flag=dfcae148f63adb82f58df7993787ba70; odin_tt=9715d5e76f01d8bfac4f8518405e218913dde1adb7dd677f9ec02c4603d5034af08b0568d8c103c5d264d57e1d62cd08; UM_distinctid=15eebdefca9236-0e5d58bc43fb0e-2834516c-4a640-15eebdefcaa9a2',
- 'X-SS-Cookie':'install_id=20457824635; ttreq=1$76925223765b5c30e70d4d35057e7c8d0f6abce6; alert_coverage=20; qh[360]=1; _ga=GA1.2.1897556923.1505932678; sessionid=6a2793f3363dc9444631d6ea4a909996; sid_guard=6a2793f3363dc9444631d6ea4a909996%7C1514306238%7C2592000%7CThu%2C+25-Jan-2018+16%3A37%3A18+GMT; sid_tt=6a2793f3363dc9444631d6ea4a909996; uid_tt=72dc0018060086d5c191476eaa449db1; login_flag=dfcae148f63adb82f58df7993787ba70; odin_tt=9715d5e76f01d8bfac4f8518405e218913dde1adb7dd677f9ec02c4603d5034af08b0568d8c103c5d264d57e1d62cd08; UM_distinctid=15eebdefca9236-0e5d58bc43fb0e-2834516c-4a640-15eebdefcaa9a2'
- };
- global.MysqlPool = require(ROOTPATH + '/lib/mysql-pool.js').instance(Config.mysqlConfig);
- testMysql();
- function testMysql(){
- MysqlPool.getConnection(function(err,con){
- con.query('SELECT VERSION() as version',function(err,result,fields){
- if(err){
- LOGGER.error('Mysql Connect error,please recheck your config');
- LOGGER.error(err);
- }else{
- LOGGER.info('Mysql Connect success');
- LOGGER.info('Mysql Version: ' + result[0]['version'] + ' | User: ' + Config.mysqlConfig.user + ' | Database: ' + Config.mysqlConfig.database);
- global.MysqlDB = require(ROOTPATH + '/lib/mysqldb.js');
- init();
- }
- });
- });
- }
- function init(){
- LOGGER.info(`Last Pin Time: ${FinalRepinTime}|${new Date(FinalRepinTime*1000)}`);
- currentTask = new Task();
- }
- function finish(){
- FinalRepinTime = firstRepinTime;
- FILE.writeFileSync(FinalRepinFile,FinalRepinTime,'UTF-8');
- LOGGER.info('--------------------------------------------------------------');
- LOGGER.info('All fav article download finished!!!');
- LOGGER.info(`Article analysis count [ ${articleCount} ]`);
- LOGGER.info('--------------------------------------------------------------');
- MysqlPool.end();
- }
- function Task(){
- let artticleList = [];
- let current = -1;
- let hasMore = false;
- getList();
- function getList(){
- let url = Config.listUrl;
- let args = [];
- args.push('page_type=2');
- args.push(`user_id=${Config.uid}`);
- args.push('max_behot_time=0');
- args.push('count=20');
- if(lastPinTime>0) args.push(`max_repin_time=${lastPinTime}`);
- url += args.join('&');
- Request({url:url,encoding: 'utf-8',json:true,headers:headers},function(err,response,body){
- if(err){
- LOGGER.error(`List get error,max repin time:${lastPinTime}`);
- LOGGER.error(`URL:${url}`);
- return;
- }
-
- if(lastPinTime==0){
- firstRepinTime = body.data[0].repin_time;
- if(FinalRepinTime==firstRepinTime){
- finish();
- return;
- }
- }
- hasMore = body.has_more;
- lastPinTime = body.max_repin_time;
- artticleList = body.data;
- getArticle();
- });
- }
- function getArticle(){
- current++;
- if(current==artticleList.length){
- return taskFinish();
- }
- let url = Config.articleUrl;
- let article = artticleList[current];
- if(FinalRepinTime == article.repin_time){
- finish();
- return;
- }
- article.content = '';
- url += article.item_id;
- Request({url:url,encoding: 'utf-8',headers:headers},function(err,response,body){
- if(err){
- LOGGER.error(`Article get error,max repin time:${lastPinTime},article id:${article.item_id}`);
- LOGGER.error(`URL:${url}`);
- return;
- }
- let content;
- if(article.has_gallery){
- content = body.match(/gallery: (.*),/);
- if(content && content.length>=2){
- content = content[1];
- if(content.match(/JSON\.parse\("(.*)"\)/)){
- content = eval(content);
- }else{
- content = JSON.parse(content).sub_images;
- }
- let arr = [];
- for(let i in content){
- arr.push(content[i].url);
- }
- article.content = arr.join(',');
- }
- }else if(article.has_video){
- content = body.match(/shareUrl: '(.*)'/);
- if(content && content.length>=2) article.content = content[1];
- }else{
- content = body.match(/content: '(.*)'\.replace/);
- if(content && content.length>=2) article.content = (content[1]).replace(/\"/g,'\\"');
- }
- if(article.content==''){
- getArticle();
- return;
- }
- insertArticle(article);
- });
- }
- function insertArticle(article){
- // console.log(article.content);
- let tempArr = [];
- let type = article.has_video?2:article.has_gallery?1:0;
- tempArr.push(`"${article.title.replace(/\"/g,'\\"')}"`);
- tempArr.push(type);
- tempArr.push(`"${article.chinese_tag}"`);
- tempArr.push(`"${article.image_url}"`);
- tempArr.push(`"${article.abstract.replace(/\"/g,'\\"')}"`);
- tempArr.push(article.gallary_image_count);
- tempArr.push(`"${article.display_url}"`);
- tempArr.push(`"${article.item_id}"`);
- let sql = `insert IGNORE into list (title,type,tag,cover,abstract,gallaryCount,source,item_id) values(${tempArr.join(',')})`;
- MysqlDB.query(sql,function(err,result){
- if(err){
- LOGGER.error(`Article insert error,article id:${article.item_id}`);
- LOGGER.error(`SQL:${sql}`);
- return;
- }
- sql = `insert ignore into article (articleID,content) values("${article.item_id}","${article.content}")`
- MysqlDB.query(sql,function(err,result){
- if(err){
- LOGGER.error(`Article insert error,article id:${article.item_id}`);
- LOGGER.error(`SQL:${sql}`);
- return;
- }
- articleCount++;
- getArticle();
- });
- });
- }
- function taskFinish(){
- LOGGER.info('List Finished!');
- if(hasMore){
- LOGGER.info('More List read!!!');
- currentTask = null;
- currentTask = new Task();
- }else{
- finish();
- }
- }
- }
|