Please help following issue: Not getting response when the document is having special chars(Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc
Error message:
System.FormatException: Invalid length for a Base-64 char array. at
System.Convert.FromBase64String(String s) at
Summarize.Summarizer.AccumulateBroadcast(String filedata, String givenWords) in
c:\DocumentSummarizer\App_Code\Summarizer.cs:line 66
Code:
--------
File 1:
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.extractor.*;
import com.lowagie.text.Document;
import com.lowagie.text.pdf.PRTokeniser;
import com.lowagie.text.pdf.PdfReader;
public class DocumentSummarizerClient {
static Properties loadProperties() {
Properties prop = new Properties();
try {
prop.load(DocumentSummarizerClient.class.getClassLoader().getResourceAsStream("vep.properties"));
} catch (Exception ioe) {
ioe.printStackTrace();
}
return prop;
}
public String getSummary(String fileName,String noOfWordsOrPercentage ){
String summaryInputData ="";
String summarizedData="";
String summarizerURL = loadProperties().getProperty("Summarizer.serviceURL");
try {
String fileExtension=fileName.substring(fileName.lastIndexOf(".")+1, fileName.length());
if (fileExtension.equalsIgnoreCase("doc")|| fileExtension.equalsIgnoreCase("txt")|| fileExtension.equalsIgnoreCase("pdf")) {
if (fileExtension.equalsIgnoreCase("txt")) {
BufferedReader bufferedReader = new BufferedReader(
new FileReader(fileName));
String line = null;
while ((line = bufferedReader.readLine()) != null) {
summaryInputData += line;
}
}
if(fileExtension.equalsIgnoreCase("doc")){
POIFSFileSystem fs = null;
fs = new POIFSFileSystem(new FileInputStream(fileName));
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
String[] paragraphs = we.getParagraphText();
for( int i=0; i<paragraphs .length; i++ ) {
paragraphs[i] = paragraphs.replaceAll("\\cM?\r?\n","");
summaryInputData+= paragraphs[i];
}
}
if(fileExtension.equalsIgnoreCase("pdf")){
Document document = new Document();
document.open();
PdfReader reader = new PdfReader(fileName);
int pageCount =reader.getNumberOfPages();
for(int i=1;i<=pageCount;i++){
byte[] bytes = reader.getPageContent(i);
PRTokeniser tokenizer = new PRTokeniser(bytes);
StringBuffer buffer = new StringBuffer();
while (tokenizer.nextToken()) {
if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
buffer.append(tokenizer.getStringValue());
}
}
summaryInputData += buffer.toString();
}
}
}
else{
System.out.println("This is Invalid document. Presntly we support only text,word and PDF documents ");
}
// String encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8");
String encoded=Base64Utils.base64Encode(summaryInputData.getBytes());
// encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8");
String parameters= "base64String="+encoded+"&noOfWordsOrPercentage="+noOfWordsOrPercentage;
summarizedData= postRequest(parameters,summarizerURL);
String slength= "<string xmlns=\"http://tempuri.org/\">";
if(summarizedData.contains("</string>")){
summarizedData= summarizedData.substring(summarizedData.indexOf(slength)+slength.length(),summarizedData.indexOf("</string>"));
summarizedData = replaceVal(summarizedData);
//System.out.println("<?xml version=\"1.0\" encoding=\"utf-8\"?><![CDATA["+summarizedData+"]]>");
// System.out.println("Summarized data "+summarizedData);
if(summarizedData.contains("Please enter the percentage")){
summarizedData="Data given cannot be summarized further";
}
}
else{
System.out.println("Data given cannot be summarized further");
summarizedData="";
}
} catch (FileNotFoundException e) {
return("The File is not found \n\n"+e.toString());
} catch (IOException e) {
return("The File is already in use \n\n"+e.toString());
} catch (Exception e) {
return(e.toString());
}
return summarizedData;
}
public static String postRequest(String parameters,String webServiceURL) throws Exception{
Properties systemSettings = System.getProperties();
systemSettings.put("http.proxyHost", loadProperties().getProperty("proxyHost"));
systemSettings.put("http.proxyPort", loadProperties().getProperty("proxyPort"));
System.setProperties(systemSettings);
String responseXML = "";
try {
URL url = new URL(webServiceURL);
URLConnection connection = url.openConnection();
HttpURLConnection httpConn = (HttpURLConnection) connection;
byte[] requestXML = parameters.getBytes();
httpConn.setRequestProperty("Content-Length", String
.valueOf(requestXML.length));
httpConn.setRequestProperty("Content-Type",
"application/x-www-form-urlencoded");
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setDoInput(true);
OutputStream out = httpConn.getOutputStream();
out.write(requestXML, 0, requestXML.length);
out.close();
InputStreamReader isr = new InputStreamReader(httpConn
.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
String tempResponse = "";
while ((temp = br.readLine()) != null)
tempResponse = tempResponse + temp;
responseXML = tempResponse;
br.close();
isr.close();
} catch (java.net.MalformedURLException e) {
System.out
.println("Error in postRequest(): Secure Service Required");
} catch (Exception e) {
System.out.println("Error in postRequest(): " + e.getMessage());
}
return responseXML;
}
public String replaceVal(String value) {
if (value == null) {
value = "";
}
value = value.replace("<", "<");
value = value.replace(">", ">");
value = value.replace("&", "&");
return value;
}
public static void main(String[] args) {
DocumentSummarizerClient testdoc=new DocumentSummarizerClient();
System.out.println("hello");
testdoc.getSummary("C:\\working_folder\\vep\\UnitTestCases\\VEP1.0\\DocumentSummarizerTestData\\TestErrorFour.doc","100%");
}
}
Note: Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc
File 2:
---------
public class Base64Utils {
private static byte[] mBase64EncMap, mBase64DecMap;
/**
* Class initializer. Initializes the Base64 alphabet (specified in RFC-2045).
*/
static {
byte[] base64Map = {
(byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F',
(byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L',
(byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q', (byte)'R',
(byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X',
(byte)'Y', (byte)'Z',
(byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f',
(byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l',
(byte)'m', (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r',
(byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x',
(byte)'y', (byte)'z',
(byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5',
(byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'+', (byte)'/'};
mBase64EncMap = base64Map;
mBase64DecMap = new byte[128];
for (int i=0; i<mBase64EncMap.length; i++)
mBase64DecMap[mBase64EncMap[i]] = (byte) i;
}
/**
* This class isn't meant to be instantiated.
*/
private Base64Utils() {
}
/**
* Encodes the given byte[] using the Base64-encoding,
* as specified in RFC-2045 (Section 6.8).
*
* @param aData the data to be encoded
* @return the Base64-encoded <var>aData</var>
* @exception IllegalArgumentException if NULL or empty array is passed
*/
public static String base64Encode(byte[] aData) {
if ((aData == null) || (aData.length == 0))
throw new IllegalArgumentException("Can not encode NULL or empty byte array.");
byte encodedBuf[] = new byte[((aData.length+2)/3)*4];
// 3-byte to 4-byte conversion
int srcIndex, destIndex;
for (srcIndex=0, destIndex=0; srcIndex < aData.length-2; srcIndex += 3) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 |
(aData[srcIndex] << 4) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+2] >>> 6) & 003 |
(aData[srcIndex+1] << 2) & 077];
encodedBuf[destIndex++] = mBase64EncMap[aData[srcIndex+2] & 077];
}
// Convert the last 1 or 2 bytes
if (srcIndex < aData.length) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077];
if (srcIndex < aData.length-1) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 |
(aData[srcIndex] << 4) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] << 2) & 077];
}
else {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] << 4) & 077];
}
}
// Add padding to the end of encoded data
while (destIndex < encodedBuf.length) {
encodedBuf[destIndex] = (byte) '=';
destIndex++;
}
String result = new String(encodedBuf);
return result;
}
/**
* Decodes the given Base64-encoded data,
* as specified in RFC-2045 (Section 6.8).
*
* @param aData the Base64-encoded aData.
* @return the decoded <var>aData</var>.
* @exception IllegalArgumentException if NULL or empty data is passed
*/
public static byte[] base64Decode(String aData) {
if ((aData == null) || (aData.length() == 0))
throw new IllegalArgumentException("Can not decode NULL or empty string.");
byte[] data = aData.getBytes();
// Skip padding from the end of encoded data
int tail = data.length;
while (data[tail-1] == '=')
tail--;
byte decodedBuf[] = new byte[tail - data.length/4];
// ASCII-printable to 0-63 conversion
for (int i = 0; i < data.length; i++)
data[i] = mBase64DecMap[data[i]];
// 4-byte to 3-byte conversion
int srcIndex, destIndex;
for (srcIndex = 0, destIndex=0; destIndex < decodedBuf.length-2;
srcIndex += 4, destIndex += 3) {
decodedBuf[destIndex] = (byte) ( ((data[srcIndex] << 2) & 255) |
((data[srcIndex+1] >>> 4) & 003) );
decodedBuf[destIndex+1] = (byte) ( ((data[srcIndex+1] << 4) & 255) |
((data[srcIndex+2] >>> 2) & 017) );
decodedBuf[destIndex+2] = (byte) ( ((data[srcIndex+2] << 6) & 255) |
(data[srcIndex+3] & 077) );
}
// Handle last 1 or 2 bytes
if (destIndex < decodedBuf.length)
decodedBuf[destIndex] = (byte) ( ((data[srcIndex] << 2) & 255) |
((data[srcIndex+1] >>> 4) & 003) );
if (++destIndex < decodedBuf.length)
decodedBuf[destIndex] = (byte) ( ((data[srcIndex+1] << 4) & 255) |
((data[srcIndex+2] >>> 2) & 017) );
return decodedBuf;
}
}
issue 2: Exception when passing 2MB .txt file
------------------------------------------------------------------
Steps to reproduce:
Call getSummary() with 2MB .txt file
Actual:
The following exception has occured:
------------------------------------
1. Error in postRequest(): Unexpected end of file from server
java.lang.NullPointerException
Please provide your precious feedback/suggestions.
Thanks in advanceā¦..
Edited by: EJP on 15/03/2011 16:52: added code tags. Please use them. Code is unreadable otherwise.