当前位置:   article > 正文

Java 百度翻译小爬虫_java 百度翻译抓包翻译

java 百度翻译抓包翻译



一、分析一下

通过在谷歌浏览器上F12打开百度翻译分析其url发现

接口在这里插入图片描述
在这里插入图片描述几次调用发现sgin 是变动的 好有个token,这两个怎么获取的呢
发现在进如翻译页面的时候是走了一个https://fanyi.baidu.com/ 的url的
在这里插入图片描述
在这里插入图片描述返回的是一个html
在这里插入图片描述

发现其中有token 并和调用翻译时的token一致,并且还有个和sign相似的 gtk,通过查阅资料发现sign是通过一段js获取的,代码如下 r为翻译的内容,gtk是从html获取的

function a(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
        a = "+" === o.charAt(t + 1) ? r >>> a: r << a,
        r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}
var C = null;
var token = function(r, _gtk) {
    var o = r.length;
    o > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(o / 2) - 5, 10) + r.substring(r.length, r.length - 10));
    var t = void 0,
    t = null !== C ? C: (C = _gtk || "") || "";
    for (var e = t.split("."), h = Number(e[0]) || 0, i = Number(e[1]) || 0, d = [], f = 0, g = 0; g < r.length; g++) {
        var m = r.charCodeAt(g);
        128 > m ? d[f++] = m: (2048 > m ? d[f++] = m >> 6 | 192 : (55296 === (64512 & m) && g + 1 < r.length && 56320 === (64512 & r.charCodeAt(g + 1)) ? (m = 65536 + ((1023 & m) << 10) + (1023 & r.charCodeAt(++g)), d[f++] = m >> 18 | 240, d[f++] = m >> 12 & 63 | 128) : d[f++] = m >> 12 | 224, d[f++] = m >> 6 & 63 | 128), d[f++] = 63 & m | 128)
    }
    for (var S = h,
    u = "+-a^+6",
    l = "+-3^+b+-f",
    s = 0; s < d.length; s++) S += d[s],
    S = a(S, u);

    return S = a(S, l),
    S ^= i,
    0 > S && (S = (2147483647 & S) + 2147483648),
    S %= 1e6,
    S.toString() + "." + (S ^ h)
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31

知道这些我们就开始写代码吧

二、使用步骤

1.引入库

我使用的是JAVA语言,需要操作HTML我们需要引入jsoup, 还需要解析返回的Json,我们引入阿里的 fastjson

  <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.12.1</version>
  </dependency>
   <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
    </dependency>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

2.具体代码

代码如下(示例):

 private static String baiduTranslate(String q, String from, String to) throws IOException, ScriptException {
        String mainpage_url = "https://fanyi.baidu.com/";
        // Jsoup 获取页面
        Document document = Jsoup.connect(mainpage_url).cookie("Cookie", "BAIDUID=1D8BC57A03641735D0F46872B391F36B; PSTM=1621752923; __yjs_duid=1_73eebc74c04c0586214b0074041092b91621754117386; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BDUSS_BFESS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BAIDUID=D1117626448036AD2AF919EC711025C3:FG=1; APPGUIDE_10_0_2=1; BDSFRCVID=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDSFRCVID_BFESS=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=3; BAIDUID_BFESS=D1117626448036AD2AF919EC711025C3:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1642991662,1643016044,1643016289,1643016293; H_PS_PSSID=35104_31254_35776_34584_35491_35797_35318_26350_35746; BA_HECTOR=258haka50ga42h00ib1guvce50r; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1643101993; ab_sr=1.0.1_MTg0OTU5NzFjYzk5NzFhOTIwMWRmOTkyNjc0MjIxZDhmY2UxYjVhZTRjZjljNzJmODdiYWUxYzc3ZTcyYzQzYTVkZmZmMTk0YzBmODQ4YTRlOTUwYzZjYjNhNDI4ZDg5ZTY3MTExNWVmNGZiMWE2YmU3MWQwYTcxZTY2ZmJlYmE=").timeout(10000).get();
        // 寻找url返回 HTML 里的 script 这里有 我们需要的 gtk 和 token
        Elements elements = document.getElementsByTag("script");
        String jscode ="var window={};try{";
        for (Element element : elements) {
            String data = element.data();
            // 通过查看html 发现token很gtk 存在两个 script里 将踏面取出
            if (data.startsWith("window") || data.startsWith("\nwindow") ) {
                jscode += data + ";";
            }
        }
        // 多拼了个;去除
        jscode = jscode.substring(0, jscode.length() -1);
        // 注意去除此段代码 防止js 运行报错
        jscode = jscode.replace("window.top.location.href = 'https://fanyi.baidu.com/';", "");
        jscode += "}catch(e){}";
        // 执行Js
        ScriptEngine engine = new ScriptEngineManager().getEngineByName("js");
        engine.eval(jscode);
        Map window = new HashMap();
        if (engine instanceof Invocable) {
            window = (Map) engine.get("window");
        }
        // 获取token和gtk
        String token = (String) ((Map) window.get("common")).get("token");
        String gtk = (String) window.get("gtk");
        String baiduUrl = "https://fanyi.baidu.com/v2transapi";

        // 获取sign
        String sign = token(q, gtk);

        Map<String, String> params = new HashMap<String, String>();
        params.put("from", from);
        params.put("to", to);
        params.put("query", q);
        params.put("transtype", "translang");
        params.put("simple_means_flag", "3");
        params.put("sign", sign);
        params.put("token", token);
        params.put("domain", "common");
        CloseableHttpClient httpClient = HttpClients.createDefault();
        HttpPost request = new HttpPost(baiduUrl);
        List<NameValuePair> paramList = new ArrayList<>();
        Set<String> keySet = params.keySet();
        for (String key : keySet) {
            paramList.add( new  BasicNameValuePair(key, params.get(key)));
        }
        request.setEntity(new UrlEncodedFormEntity(paramList, "UTF-8"));
        request.setHeader("Cookie", "BIDUPSID=1D8BC57A03641735D0F46872B391F36B; PSTM=1621752923; __yjs_duid=1_73eebc74c04c0586214b0074041092b91621754117386; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BDUSS_BFESS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BAIDUID=D1117626448036AD2AF919EC711025C3:FG=1; APPGUIDE_10_0_2=1; BDSFRCVID=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDSFRCVID_BFESS=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=3; BAIDUID_BFESS=D1117626448036AD2AF919EC711025C3:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1642991662,1643016044,1643016289,1643016293; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1643102040; ab_sr=1.0.1_N2Y5ZGI2ZTE4MGIxMWQxNmU4MDIyMzRhMjg5NTg4MzRkZDk2Njc3MmY0M2Q3NDFkYzY1MzdlZWEzOGE1MmZkOWFjMDU0OGMxZmU4MDFiZmJiZDVhMDIwODRmNWY0YWY3ZTZiZmUzNGQ1MmMyNTQ4YjIyMWUwM2UyZTY2Mzg3YmU=; H_PS_PSSID=35104_31254_35776_34584_35491_35797_35318_26350_35746; BA_HECTOR=2k2kah210hag8004dr1guvj1c0q");
        request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36");

        CloseableHttpResponse response = httpClient.execute(request);
        HttpEntity entity = response.getEntity();

        // 解析JSON
        String result = EntityUtils.toString(entity, "utf-8");
        JSONObject jsonObject = JSONObject.parseObject(result);
        JSONObject object = (JSONObject) jsonObject.get("trans_result");
        JSONArray object1 = (JSONArray) object.get("data");
        JSONObject object2 = (JSONObject)object1.get(0);
        String dst = (String) object2.get("dst");
        EntityUtils.consume(entity);
        response.getEntity().getContent().close();
        response.close();
        return dst;
    }
    // 获取sign
    private static String token(String value, String gtk) {
        String result = "";
        ScriptEngine engine = new ScriptEngineManager().getEngineByName("js");
        try {
  			// 这里的js 就是上文提到的JS 我将他放入文件中
            FileReader reader = new FileReader("C:\\NC\\js.js");
            engine.eval(reader);

            if (engine instanceof Invocable) {
                Invocable invoke = (Invocable) engine;
                result = String.valueOf(invoke.invokeFunction("token", value, gtk));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }
    //执行一下
    public static void main(String[] args) throws IOException, ScriptException {
        String s = baiduTranslate("你好", "zh", "jp");
        System.out.println(s);
    }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92

该处使用的url网络请求的数据。


总结

第一次写文章

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/352909
推荐阅读
相关标签
  

闽ICP备14008679号