--JRuby's #open throws an error in Windows 10 environment --Let's implement it on the Java side --If you use jisautodetect, you can't use utf-8. --To the code below
--Arguments --url (variable name: link) --Time-out time (variable name: time_limit)
JavaOpen.java
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
public class JavaOpen{
    public static String open(String link, int time_limit){
        String html = "";
        try {
            URL url = new URL(link);
            URLConnection con = url.openConnection();
            con.setConnectTimeout(time_limit*300);
            con.setReadTimeout(time_limit*700);
            try (InputStream is = con.getInputStream();){
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                byte[] byteChunk = new byte[8192];
                int n;
                while ( (n = is.read(byteChunk)) > 0 ) {
                    baos.write(byteChunk, 0, n);
                }
                byte[] bytes = baos.toByteArray();
                html = bytesToHtml(bytes);
            } catch (IOException e) {
                e.printStackTrace ();
            }
        } finally {
            return html;
        }
    }
    public static String bytesToHtml(byte[] src) throws UnsupportedEncodingException {
        String[] char_codes = { "UTF8","SJIS","EUC_JP","EUC_JP_LINUX","EUC_JP_Solaris" };
        for (String cc: char_codes){
            String s_tmp = new String(src, cc);
            byte[] b_tmp = s_tmp.getBytes(cc);
            if (Arrays.equals(src, b_tmp)) {
                return s_tmp;
            }
        }
        return "";
    }
}
--String [] char_codes = {"UTF8", "SJIS", "EUC_JP", "EUC_JP_LINUX", "EUC_JP_Solaris"}; is a character code that you may be able to access, so feel free to use it.
--I wanted to come up with an alternative for the variable name time_limit ...
-- setConnectTimeout: setReadTimeout = 3: 7 I allocated it, but what about normal?
――I wanted to know how to read 8192 bytes at a time, but I lost the intention to move.
-try-with-resources statement --ORACLE -Simple character code judgment in Java --Qiita --Supported encodings --ORACLE
Recommended Posts