Thursday 28 November 2013

Convert Web Page To Pdf Using Itext

1 comments
Include Following Jars in classpath:
1. itextpdf-5.4.2.jar
2. jdom-1.1.jar
3. tagsoup-1.2.jar
4. xmlworker-5.4.1.jar
Run WebPageToPdf.java
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
 
public class WebPageToPdf{
 public static void main(String[] args) throws JDOMException, DocumentException {
 
  URL url;
 
  try {
   //----------------------- HTML CREATTION ------------------------
   // get URL content
   url = new URL("http://www.tutorialspoint.com/sql/sql-rdbms-concepts.htm");
   URLConnection conn = url.openConnection();
 
   // open the stream and put it into BufferedReader
   BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
   String inputLine;
 
   //save to this filename
   String fileName = "C://temp_html.html";
   File file = new File(fileName);
   if (!file.exists()) {
    file.createNewFile();
   }
   //use FileWriter to write file
   FileWriter fw = new FileWriter(file.getAbsoluteFile());
   BufferedWriter bw = new BufferedWriter(fw);
   while ((inputLine = br.readLine()) != null) {
    bw.write(inputLine);
    System.out.println(inputLine);
   }
   bw.close();
   br.close();
   System.out.println("Html Creation Done");
   //----------------------- HTML CREATTION ------------------------
   
   //----------------------- HTML TO XML CREATTION ------------------------   
   FileWriter fwOutXml =null;
   FileReader frInHtml=null;
   BufferedWriter bwOutXml =null;
   BufferedReader brInHtml=null;
   
   frInHtml = new FileReader("C:\\temp_html.html");
      brInHtml = new BufferedReader(frInHtml);
      SAXBuilder saxBuilder = new SAXBuilder("org.ccil.cowan.tagsoup.Parser", false);
      org.jdom.Document jdomDocument = saxBuilder.build(brInHtml);
      XMLOutputter outputter = new XMLOutputter();
   
      outputter.output(jdomDocument, System.out);
         fwOutXml = new FileWriter("C:\\temp_xml.xml");
         bwOutXml = new BufferedWriter(fwOutXml);
         outputter.output(jdomDocument, bwOutXml);
         System.out.flush();
         System.out.println("XML Creation Done");

         fwOutXml.flush();
         fwOutXml.close();
         bwOutXml.close();
           //----------------------- HTML TO XML CREATTION ------------------------   
       
         //----------------------- XML TO PDF CREATTION ------------------------   
         Document document = new Document();
            PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("C://pdf.pdf"));
         document.open();
         XMLWorkerHelper.getInstance().parseXHtml(writer, document,new FileInputStream("C://temp_xml.xml"));        
         document.close();
         System.out.println( "PDF Created Successfully" );
         //----------------------- XML TO PDF CREATTION ------------------------ 
         
         File html_temp_file = new File("C://temp_html.html");
         File xml_temp_file = new File("C://temp_xml.xml");
         xml_temp_file.delete();
         html_temp_file.delete();
         System.out.println("Both Files Deleted Successfully");
 
  } catch (MalformedURLException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
 
 }
}

Wednesday 27 November 2013

Convert Webpage into PDF Using Java and PhantomJs

2 comments
Download the demo folder from here : Download Me
Place it in your D:/ drive root.
Now Run Test.java
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;


public class Test {
public static void main(String[] args) {
 try {
  
  //Set URL INFO
  String URL = "http://www.google.com";
  String FILE_NAME = "google_pdf";
  
  StringBuffer buffer = new StringBuffer("var page = require('webpage').create();");
  buffer.append("page.open('"+URL+"', function () {");
  buffer.append("page.render('temp_file/temp_"+FILE_NAME+".pdf');");
  buffer.append("phantom.exit();");
  buffer.append("});");
  
  //Delete Content From JS File
  File writer = new File("D://demo/demo.js");
  writer.delete();

  BufferedWriter bw = new BufferedWriter(new FileWriter("D://demo/demo.js", true));
  bw.write(buffer.toString());
     bw.close();
     
     System.out.println("PDF URL: D:/demo/temp_file/temp_"+FILE_NAME+".pdf");
     
     // Execute command
  String command = "cmd /c start D:/demo/my.bat";
  
     Process child = Runtime.getRuntime().exec(command);

     // Get output stream to write from it
     OutputStream out = child.getOutputStream();

     out.write("cd C:/ /r/n".getBytes());
     out.flush();
     out.write("dir /r/n".getBytes());
     out.close();
 } catch (IOException e) {
 }
}
}

Put these code in your my.bat inside demo folder.
cd\
D:
cd demo
phantomjs demo.js

exit

Thursday 21 November 2013

Extarct Web Link Using Java

0 comments
You have to process response html to detect all anchor tag using RegEx.
See Example:
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class LinkExt {
 
 private Pattern patternTag, patternLink;
 private Matcher matcherTag, matcherLink;
 
 private static final String HTML_A_TAG_PATTERN = "(?i)]+)>(.+?)";
 private static final String HTML_A_HREF_TAG_PATTERN = 
  "\\s*(?i)href\\s*=\\s*(\"([^\"]*\")|'[^']*'|([^'\">\\s]+))";
 
 
 public LinkExt() {
  patternTag = Pattern.compile(HTML_A_TAG_PATTERN);
  patternLink = Pattern.compile(HTML_A_HREF_TAG_PATTERN);
 }
 
 /**
  * Validate html with regular expression
  * 
  * @param html
  *            html content for validation
  * @return Vector links and link text
  */
 public Vector grabHTMLLinks(final String html) {
 
  Vector result = new Vector();
 
  matcherTag = patternTag.matcher(html);
 
  while (matcherTag.find()) {
 
   String href = matcherTag.group(1); // href
   String linkText = matcherTag.group(2); // link text
 
   matcherLink = patternLink.matcher(href);
 
   while (matcherLink.find()) {
 
    String link = matcherLink.group(1); // link
    HtmlLink obj = new HtmlLink();
    obj.setLink(link);
    obj.setLinkText(linkText);
 
    result.add(obj);
 
   }
 
  }
 
  return result;
 
 }
 
 class HtmlLink {
 
  String link;
  String linkText;
 
  HtmlLink(){};
 
  @Override
  public String toString() {
   return new StringBuffer("Link : ").append(this.link).append(" :: ")
   .append(" Link Text : ").append(this.linkText).toString();
  }
 
  public String getLink() {
   return link;
  }
 
  public void setLink(String link) {
   this.link = replaceInvalidChar(link);
  }
 
  public String getLinkText() {
   return linkText;
  }
 
  public void setLinkText(String linkText) {
   this.linkText = linkText;
  }
 
  private String replaceInvalidChar(String link){
   link = link.replaceAll("'", "");
   link = link.replaceAll("\"", "");
   return link;
  }
 
 }
 
 public static void main(String[] args) {
  try
  {
  //HTML RESPONSE 
  URL u = new URL("http://www.google.com");
  URLConnection conn = u.openConnection();
  BufferedReader in = new BufferedReader(
                          new InputStreamReader(
                              conn.getInputStream()));
  StringBuffer buffer = new StringBuffer();
  String inputLine;
  while ((inputLine = in.readLine()) != null) 
      buffer.append(inputLine);
  in.close();
  System.out.println("HTML RESPONSE:  "+buffer.toString());
  //HTML RESPONSE
  
  //LINK EXTRATOR
  LinkExt linkExt = new LinkExt();
  Vector links = linkExt.grabHTMLLinks(buffer.toString());
  for (int i = 0; i < links.size(); i++) {
   HtmlLink htmlLinks = links.get(i);
   System.out.println(htmlLinks);
  }
  //LINK EXTRACTOR
  
  }catch(Exception ex)
  {
   ex.printStackTrace();
  }
  
   
 }
}

Monday 4 November 2013

Spring Social

0 comments
Spring Social is another good module of spring framework that provide solution for establish a connection between service provide (Such as Facebook , Twitter, Linked in, Tumblr many more) with your application (Service Consumer) on behalf of existing user of service provider.

The key features of Spring Social are:
  • A "Connect Framework" that handles the core authorization and connection flow with service providers.
  • A "Connect Controller" that handles the OAuth exchange between a service provider, consumer, and user in a web application environment.
  • A "Signin Controller" that allows users to authenticate with your application by signing in with their Provider accounts, such as their Twitter or Facebook accounts.





Related Posts Plugin for WordPress, Blogger...