我们这些站长们都想自己的网站在搜索引擎中有优异的表现,前十是现阶段的梦想,前三是终极梦想。做国内seo的朋友自然不愿意放过任何一个网页被百度索引的机会,没办法,谁让百度一家独大呢?
排名在后,收录在前,收录在后,抓取在前。要想被收录,就得先被抓取,抓取网页是前提,百度站长平台(ziyuan.baidu.com)提取了链接提交通道,不过不幸,手动提交,每次仅限20条url,对于一个稍大点的网站,每天产生的页面比较多,就比较麻烦了。虽然百度也提供了js自动提交的方式,但前提是该页面必须被访问才能触发提交行为,莫不是,每发布一篇,就得浏览一番,费劲啊。
有没有高效的办法呢?有的,百度也意识到上述问题,于是推出了api实时主动推送的服务,这个功能确实强大,可以将新发布的、未收录的网址链接收集起来,统统发给百度,通知蜘蛛尽快抓取,不过需要有编程能力才能实现。
作为一名会写些程序、又干过seo的我来说,并不是很难的问题,百度提供了post、curl、php、ruby四种调用接口的方式,惟独没有java,考虑到性能问题,决定使用java来写这个工具。既然百度没有提供,就只能自己动手了,参考了一下post的用例,直接用Java写了出来,经测试,可以实现批量提交。现将源代码公布出来,有需要的朋友可以拿去使用。环境为:java8,用到了alibaba的fastjson解析json数据,fastJson地址:https://github.com/alibaba/fastjson,将百度提供的提交状态异常提示信息转换成中文,直观易懂。源代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | package com.yangshengliang.baidutuisong; import com.alibaba.fastjson.JSONObject; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.Vector; /** * 百度未收录网页链接主动提交工具 * * @author fedkey * @date 2018.10.25 * @url www.yangshengliang.com */ public class PostUrls { private static String zzApiUrl = 接口调用地址; //从百度站长平台获取 //返回状态码 private final static int SUCESS_200 = 200 ; private final static int ERROR_400 = 400 ; private final static int ERROR_401 = 401 ; private final static int ERROR_404 = 404 ; private final static int ERROR_500 = 500 ; public static String postUrl(Vector<String> urls) throws IOException { URLConnection urlConnection; StringBuffer result = new StringBuffer(); PrintWriter postPrintWriter = null ; BufferedReader bufferedReader = null ; int statusCode; try { urlConnection = new URL(zzApiUrl).openConnection(); urlConnection.setRequestProperty( "Content-Type" , "text/plain" ); urlConnection.setRequestProperty( "User-Agent" , "curl/7.12.1" ); urlConnection.setRequestProperty( "Host" , "data.zz.baidu.com" ); urlConnection.setDoInput( true ); urlConnection.setDoOutput( true ); HttpURLConnection httpUrlConnection = (HttpURLConnection) urlConnection; postPrintWriter = new PrintWriter(httpUrlConnection.getOutputStream()); //构造请求参数 StringBuffer parm = new StringBuffer(); for (String s : urls) { if (s != null ) { //去除两头空格 String tmpUrl = s.trim(); parm.append(tmpUrl); parm.append( "\n" ); } } } //发送参数 postPrintWriter.print(parm); //刷新输出流缓冲 postPrintWriter.flush(); statusCode = httpUrlConnection.getResponseCode(); switch (statusCode) { case SUCESS_200: if (statusCode == SUCESS_200) { //通过BufferedReader输入流来读取Url的响应 bufferedReader = new BufferedReader( new InputStreamReader(urlConnection.getInputStream())); String line; while ((line = bufferedReader.readLine()) != null ) { result.append(line); } } String resultT = result.toString(); if (result != null ) { JSONObject jsonObject = JSONObject.parseObject(resultT); Object t = jsonObject.get( "not_same_site" ); Object successInt = jsonObject.get( "success" ); if (successInt.equals( 0 )){ result.delete( 0 , result.length()); result.append( "提交失败," ); } else if (successInt.equals( 1 )){ result.delete( 0 , result.length()); result.append( "提交成功" ); } if (t != null ) { result.append( "接口调用地址与提交的网址不匹配" ); } } break ; case ERROR_400: result.append( "站点未在站长平台验证" ); break ; case ERROR_401: result.append( "接口调用地址 错误" ); break ; case ERROR_404: result.append( "接口地址填写错误" ); break ; case ERROR_500: result.append( "服务器偶然异常,通常重试就会成功" ); break ; default : result.append( "未知错误" ); break ; } } catch (Exception e) { e.printStackTrace(); } finally { //关闭流 if (postPrintWriter != null ) { postPrintWriter.close(); } if (bufferedReader != null ) { bufferedReader.close(); } } return result.toString(); } } |