From 1968b38cd52199b0a4e6b747ffceec845ba0b606 Mon Sep 17 00:00:00 2001 From: Osiris Team Date: Tue, 28 Sep 2021 21:08:02 +0200 Subject: [PATCH] 0.0.5 --- README.md | 23 +- pom.xml | 2 +- .../osiris/headlessbrowser/NodeContext.java | 202 +++++++++++++----- .../osiris/headlessbrowser/NodeWindow.java | 17 +- .../utils/AsyncInputStream.java | 2 +- .../headlessbrowser/GraalContextTest.java | 8 +- 6 files changed, 175 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 5f7c6bb..1d54326 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,27 @@ # Headless-Browser -A new, headless browser written in Java with latest JavaScript support. Powered by the GraalJS-Engine. +A new, headless browser written in Java with latest JavaScript support. Powered by the GraalJS-Engine, +the NodeJs-Engine and Puppeteer. ```java HBrowser hBrowser = new HBrowser(); -HWindow hWindow = hBrowser.openNewWindow().load("https://wikipedia.org"); +try(HWindow hWindow = hBrowser.openWindowAndLoad("https://wikipedia.org")){ + // Do stuff +} ``` ## Features +You can choose from two JavaScript engines: NodeJs and GraalJs. -- [x] Can load pages and partially execute their JavaScript code. Contributions are needed for implementing - all [JS Web-APIs](https://developer.mozilla.org/en-US/docs/Web/API), to achieve full JavaScript - support. [Click here to see a list of already implemented APIs and how to implement one.](how-to-implement-a-js-web-api.md) -- [x] Uses Jsoup for editing HTML directly in Java. -- [x] Uses GraalJS-Engine to execute JavaScript code. -- [ ] Access to all JS-Web APIs from within Java. +The NodeJs engine is currently the default and recommended one. +Puppeteer gets installed automatically along with the latest NodeJs version. +Currently, only Windows 64x supported (going to change very soon)! + +Puppeteer and NodeJs are directly usable in Java. + +Creating a new browser completely in Java is ongoing work. Read more below at "Why contribute?". + +Jsoup is used to work with HTML in Java. ## Installation diff --git a/pom.xml b/pom.xml index 99aba89..400cbb6 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.osiris.headlessbrowser Headless-Browser - 0.0.4 + 0.0.5 jitpack diff --git a/src/main/java/com/osiris/headlessbrowser/NodeContext.java b/src/main/java/com/osiris/headlessbrowser/NodeContext.java index 301a3bd..5913b40 100644 --- a/src/main/java/com/osiris/headlessbrowser/NodeContext.java +++ b/src/main/java/com/osiris/headlessbrowser/NodeContext.java @@ -10,19 +10,19 @@ import java.io.*; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; +import java.nio.file.Files; +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; public class NodeContext implements AutoCloseable { private final File installationDir = new File(System.getProperty("user.dir") + "/NodeJS-Installation"); - private File executableFile; private final Process process; private final AsyncInputStream processInput; private final OutputStream processOutput; - private final PrintStream out = System.out; + private File executableFile; + private File lastJsCodeExecutionResultFile; public NodeContext() { // Download and install NodeJS into current working directory if no installation found @@ -94,7 +94,7 @@ public NodeContext() { // Tried multiple things without success. // Update: Node.exe must be started with this flag to get correct I/O: --interactive processInput = new AsyncInputStream(process.getInputStream()); - processInput.listeners.add(line -> out.println("[Node-JS] " + line)); + processInput.listeners.add(line -> out.println("[" + this + "] " + line)); new AsyncInputStream(process.getErrorStream()).listeners.add(line -> System.err.println("[Node-JS-ERROR] " + line)); processOutput = process.getOutputStream(); out.println(" SUCCESS!"); @@ -117,7 +117,21 @@ public NodeContext() { } try { - executeJavaScript("const executeJavaScriptAndGetResult = null;"); + Thread.sleep(3000); + lastJsCodeExecutionResultFile = new File(executableFile.getParentFile() + "/JavaScriptCodeResult.txt"); + if (!lastJsCodeExecutionResultFile.exists()) lastJsCodeExecutionResultFile.createNewFile(); + String resultFilePath = lastJsCodeExecutionResultFile.getAbsolutePath().replace("\\", "/"); // To avoid issues with indows file path formats + executeJavaScript("var writeResultToJava = function(result) {\n" + + "var fs = require('fs')\n" + + "fs.writeFile('" + resultFilePath + "', result, err => {\n" + // the result var must be defined in the provided jsCode + " if (err) {\n" + + " console.error(err)\n" + + " return\n" + + " }\n" + + " //file written successfully\n" + + "})\n" + + "};" + + "console.log('Context initialised!');"); } catch (Exception e) { e.printStackTrace(); } @@ -129,23 +143,45 @@ public void close() throws Exception { } public NodeContext writeLine(String line) throws IOException { - processOutput.write("\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines - processOutput.flush(); - processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines - processOutput.flush(); - processOutput.write((line + "\n").getBytes(StandardCharsets.UTF_8)); - processOutput.flush(); - processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines - processOutput.flush(); - synchronized (out) { - if (line.contains("\n")) { - out.println("START ===>"); - out.println("// Writing line(s) to NodeJS context:"); - out.println(line); - out.println("END <==="); - } else { + if (line.contains("\n")) { + synchronized (out) { + out.println("Writing multiple lines to NodeJS context:"); + out.println("START >>>>>>>>>"); + } + int lineNumber = 1; + String singleLine = null; + try (BufferedReader br = new BufferedReader(new StringReader(line))) { + while ((singleLine = br.readLine()) != null) { + synchronized (out) { + out.println(lineNumber + "| " + singleLine); + lineNumber++; + } + processOutput.write("\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); + processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); + processOutput.write((singleLine + "\n").getBytes(StandardCharsets.UTF_8)); + processOutput.flush(); + processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); + + } + } + synchronized (out) { + out.println("END <<<<<<<<<<<"); + } + } else { + synchronized (out) { out.println("Writing line to NodeJS context: " + line); } + processOutput.write("\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); + processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); + processOutput.write((line + "\n").getBytes(StandardCharsets.UTF_8)); + processOutput.flush(); + processOutput.write(".break\n".getBytes(StandardCharsets.UTF_8)); // To ensure that multi-lined code from before doesn't affect the next lines + processOutput.flush(); } return this; } @@ -154,8 +190,50 @@ public NodeContext writeLine(String line) throws IOException { * Executes JavaScript code from the provided {@link String} in the
* current {@link NodeContext}. */ - public NodeContext executeJavaScript(String jsCode) throws IOException { - writeLine(jsCode); + public synchronized NodeContext executeJavaScript(String jsCode) { + try { + if (jsCode.contains("\n")) { + synchronized (out) { + out.println("Executing following JS-Code: "); + out.println("JS-CODE START >"); + String singleLine = null; + try (BufferedReader br = new BufferedReader(new StringReader(jsCode))) { + while ((singleLine = br.readLine()) != null) { + synchronized (out) { + out.println(singleLine); + } + } + } + out.println("JS-CODE END <"); + } + } else { + synchronized (out) { + out.println("Executing following JS-Code: " + jsCode); + } + } + + AtomicBoolean wasExecuted = new AtomicBoolean(); + Consumer listener = line -> wasExecuted.set(true); + processInput.listeners.add(listener); + + // Writing stuff directly to the process output/NodeJs REPL console somehow is very error-prone. + // That's why instead we create a temp file with the js code in it and load it using the .load command. + File tmpJs = new File(executableFile.getParentFile() + "/temp" + new Random().nextInt() + ".js"); + if (!tmpJs.exists()) tmpJs.createNewFile(); + Files.write(tmpJs.toPath(), jsCode.getBytes(StandardCharsets.UTF_8)); + executeJavaScript(tmpJs); + + // Wait until we receive a response, like undefined + while (!wasExecuted.get()) { + Thread.sleep(100); + } + + processInput.listeners.remove(listener); + tmpJs.delete(); + } catch (Exception e) { + System.err.println("Error during JavaScript execution! Details: "); + throw new RuntimeException(e); + } return this; } @@ -164,40 +242,29 @@ public NodeContext executeJavaScript(String jsCode) throws IOException { *
      *     var result = InsertYourFunctionsResultHere;
      * 
+ * That result will get returned to this Java method. */ - public String executeJavaScriptAndGetResult(String jsCode) throws IOException { - File resultFile = new File(executableFile.getParentFile() + "/executeJavaScriptAndGetResult.txt"); - if (!resultFile.exists()) resultFile.createNewFile(); - String resultFilePath = resultFile.getAbsolutePath(); - - executeJavaScript("executeJavaScriptAndGetResult = function() {\n" + - jsCode + "\n" + - "var fs = require('fs')\n" + - "fs.writeFile('" + resultFilePath + "', result, err => {\n" + // the result var must be defined in the provided jsCode - " if (err) {\n" + - " console.error(err)\n" + - " return\n" + - " }\n" + - " //file written successfully\n" + - "})\n" + - "};" + - "executeJavaScriptAndGetResult();" + - "executeJavaScriptAndGetResult = null;"); - - StringBuilder result = new StringBuilder(); - String line = null; - try (BufferedReader bufferedReader = new BufferedReader(new FileReader(resultFile))) { - while ((line = bufferedReader.readLine()) != null) { - result.append(line + "\n"); + public String executeJavaScriptAndGetResult(String jsCode) { + try { + executeJavaScript(jsCode); + executeJavaScript("writeResultToJava(result);\n"); + + StringBuilder result = new StringBuilder(); + String line = null; + try (BufferedReader bufferedReader = new BufferedReader(new FileReader(lastJsCodeExecutionResultFile))) { + while ((line = bufferedReader.readLine()) != null) { + result.append(line + "\n"); + } } - } - // Clear the files content because we already got what we need - try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(resultFile))) { - bufferedWriter.write(""); + // Clear the files content because we already got what we need + try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(lastJsCodeExecutionResultFile))) { + bufferedWriter.write(""); + } + return result.toString(); + } catch (Exception e) { + throw new RuntimeException(e); } - - return result.toString(); } /** @@ -205,7 +272,7 @@ public String executeJavaScriptAndGetResult(String jsCode) throws IOException { * current {@link NodeContext}. */ public NodeContext executeJavaScript(File jsFile) throws IOException { - writeLine(jsFile.getAbsolutePath()); + writeLine(".load " + jsFile.getAbsolutePath()); return this; } @@ -248,4 +315,27 @@ public Process executeNpmWithArgs(String... args) throws IOException, Interrupte return process; } + public File getInstallationDir() { + return installationDir; + } + + public File getExecutableFile() { + return executableFile; + } + + public Process getProcess() { + return process; + } + + public AsyncInputStream getProcessInput() { + return processInput; + } + + public OutputStream getProcessOutput() { + return processOutput; + } + + public PrintStream getOut() { + return out; + } } diff --git a/src/main/java/com/osiris/headlessbrowser/NodeWindow.java b/src/main/java/com/osiris/headlessbrowser/NodeWindow.java index b4aff81..0cdfbee 100644 --- a/src/main/java/com/osiris/headlessbrowser/NodeWindow.java +++ b/src/main/java/com/osiris/headlessbrowser/NodeWindow.java @@ -19,19 +19,13 @@ public class NodeWindow implements AutoCloseable { private boolean enableJavaScript; private Map customHeaders; - public static void main(String[] args) throws IOException { - //new HBrowser().openWindow().executeJS("const hello = 'hi!';").executeJS("console.log(hello);"); - System.out.println(new HBrowser().openWindow().load("example.com").getDocument().outerHtml()); - } - public NodeWindow(HBrowser parentBrowser, boolean enableJavaScript, Map customHeaders) { this.parentBrowser = parentBrowser; this.enableJavaScript = enableJavaScript; this.customHeaders = customHeaders; try { jsContext.npmInstall("puppeteer"); - jsContext.executeJavaScript("" + - "const puppeteer = require('puppeteer');" + + jsContext.executeJavaScript("const puppeteer = require('puppeteer');\n" + "const browser = await puppeteer.launch();\n" + "const page = await browser.newPage();\n"); } catch (Exception e) { @@ -61,12 +55,17 @@ public NodeWindow load(String url) throws IOException { return this; } - public Document getDocument() throws IOException { + public Document getDocument() { String rawHtml = jsContext.executeJavaScriptAndGetResult("" + - "var result = await page.evaluate(() => document.body.innerHTML);"); + "var result = await page.evaluate(() => document.body.innerHTML);\n"); return Jsoup.parse(rawHtml); } + public String getTitle() { + return jsContext.executeJavaScriptAndGetResult("" + + "var result = await page.title();\n" + + ""); + } /** * Executes the provided JavaScript code in the current context.
diff --git a/src/main/java/com/osiris/headlessbrowser/utils/AsyncInputStream.java b/src/main/java/com/osiris/headlessbrowser/utils/AsyncInputStream.java index 564f147..9d8dd31 100644 --- a/src/main/java/com/osiris/headlessbrowser/utils/AsyncInputStream.java +++ b/src/main/java/com/osiris/headlessbrowser/utils/AsyncInputStream.java @@ -9,9 +9,9 @@ import java.util.function.Consumer; public class AsyncInputStream { - public List> listeners = new CopyOnWriteArrayList<>(); private final InputStream inputStream; private final Thread thread; + public List> listeners = new CopyOnWriteArrayList<>(); public AsyncInputStream(InputStream inputStream) { this.inputStream = inputStream; diff --git a/src/test/java/com/osiris/headlessbrowser/GraalContextTest.java b/src/test/java/com/osiris/headlessbrowser/GraalContextTest.java index 8deaaa2..d1affa6 100644 --- a/src/test/java/com/osiris/headlessbrowser/GraalContextTest.java +++ b/src/test/java/com/osiris/headlessbrowser/GraalContextTest.java @@ -14,10 +14,10 @@ public static void main(String[] args) { .build()) { context.getBindings("js").putMember("javaObj", new MyClass()); boolean valid = context.eval("js", - " javaObj.id == 42" + - " && javaObj.text == '42'" + - " && javaObj.arr[1] == 42" + - " && javaObj.ret42() == 42") + " javaObj.id == 42" + + " && javaObj.text == '42'" + + " && javaObj.arr[1] == 42" + + " && javaObj.ret42() == 42") .asBoolean(); context.eval("js", "javaObj.print('HELLO!!!');"); assert valid == true;