Hi All,
I have an invalid XML file which contains Return characters at the end of each line. I need to delete these return characters so the file becomes valid.
Does anybody have any idea how this could be done using
RandomAccessFile
?
I found joop_eggen's posting in this forum, modified it just a little and wanted to use it, but since the replacement character is "" (blank) it does not do what I need to.
The XML file looks like this:
<?xml version="1.0"?>
<EPMSObject>
<EPMSRecord><facilityname>KT0</facilityname><date_time>2007-06-01T00:00:00</date_time><devicetype>RPP</devicetype><devicename>RPP1A1_001.BCMF</devicename><meter>BCMF</meter><ckt_01_current>4.136000000000000e+000</ckt_01_current><ckt_02_current>0.000000000000000e+000</ckt_02_current><ckt_03_current>5.521000000000000e+000</ckt_03_current><ckt_04_current>0.000000000000000e+000</ckt_04_current><ckt_05_current>5.880000000000000e+000</ckt_05_current><ckt_06_current>0.000000000000000e+000</ckt_06_current><ckt_07_current>4.086000000000000e+000</ckt_07_current><ckt_08_current>0.000000000000000e+000</ckt_08_current><ckt_09_current>4.994000000000000e+000</ckt_09_current><ckt_10_current>0.000000000000000e+000</ckt_10_current><ckt_11_current>4.374000000000000e+000</ckt_11_current><ckt_12_current>0.000000000000000e+000</ckt_12_current><ckt_13_current>4.314000000000000e+000</ckt_13_current><ckt_14_current>0.000000000000000e+000</ckt_14_current><ckt_15_current>4.112000000000000e+000</ckt_15_current><ckt_16_current>0.000000000000000e+000</ckt_16_current><ckt_17_current>4.287000000000000e+000</ckt_17_current><ckt_18_current>0.000000000000000e+000</ckt_18_current><ckt_19_current>4.254000000000000e+000</ckt_19_current><ckt_20_current>0.000000000000000e+000</ckt_20_current><ckt_21_current>3.970000000000000e+000</ckt_21_current><ckt_22_current>0.000000000000000e+000</ckt_22_current><ckt_23_current>5.640000000000000e+000</ckt_23_current><ckt_24_current>0.000000000000000e+000</ckt_24_current><ckt_25_current>7.123000000000000e+000</ckt_25_current><ckt_26_current>0.000000000000000e+000</ckt_26_current><ckt_27_current>5.118000000000000e+000</ckt_27_current><ckt_28_current>0.000000000000000e+000</ckt_28_current><ckt_29_current>6.094000000000000e+000</ckt_29_current><ckt_30_current>0.000000000000000e+000</ckt_30_current><ckt_31_current>0.000000000000000e+000</ckt_31_current><ckt_32_current>0.000000000000000e+000</ckt_32_current><ckt_33_current>0.000000000000000e+000</ckt_33_current><ckt_34_current>0.000000000000000e+000</ckt_
34_current><ckt_35_current>0.000000000000000e+000</ckt_35_current><ckt_36_current>0.000000000000000e+000</ckt_36_current><ckt_37_current>0.000000000000000e+000</ckt_37_current><ckt_38_current>0.000000000000000e+000</ckt_38_current><ckt_39_current>0.000000000000000e+000</ckt_39_current><ckt_40_current>0.000000000000000e+000</ckt_40_current><ckt_41_current>0.000000000000000e+000</ckt_41_current><ckt_42_current>0.000000000000000e+000</ckt_42_current></EPMSRecord>
</EPMSObject>
Here is joop_eggen's code:
import java.io.*;
import java.nio.*;
import java.nio.channels.*;
public class Patch {
private static byte[] sought;
private static byte[] replacement;
private static boolean matches(MappedByteBuffer bb, int pos) {
for (int j = 0; j < sought.length; ++j)
{
if (sought[j] != bb.get(pos + j))
{
return false;
}
}
return true;
}
private static void replace(MappedByteBuffer bb, int pos) {
for (int j = 0; j < sought.length; ++j)
{
byte b = (j < replacement.length)? replacement[j] : (byte)' ';
bb.put(pos + j, b);
}
}
private static void searchAndReplace(MappedByteBuffer bb, int sz) {
int replacementsCount = 0;
for (int pos = 0; pos <= sz - sought.length; ++pos)
{
if (matches(bb, pos)) {
replace(bb, pos);
pos += sought.length - 1;
++replacementsCount;
}
}
System.out.println("" + replacementsCount + " replacements done.");
}
// Search for occurrences of the input pattern in the given file
private static void patch(File f) throws IOException {
// Open the file and then get a channel from the stream
RandomAccessFile raf = new RandomAccessFile(f, "rw"); // "rws", "rwd"
FileChannel fc = raf.getChannel();
// Get the file's size and then map it into memory
int sz = (int)fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_WRITE, 0, sz);
searchAndReplace(bb, sz);
bb.force(); // Write back to file, like "flush()"
// Close the channel and the stream
raf.close();
}
public static void main(String[] args) {
String E_O_L;
E_O_L = System.getProperty( "line.separator" );
if (args.length == 0)
{
args = new String[] { E_O_L, "", "C:\\GTI\\EPMSRecords.xml" };
}
if (args.length < 3) {
System.err.println("Usage: java Patch sought replacement file...");
return;
}
sought = args[0].getBytes();
replacement = args[1].getBytes();
//if (sought.length != replacement.length) {
// Better build-in some support for padding with blanks.
//System.err.println("Usage: sought (" + args[0] + ") and replacement (" + args[1] + ") must have same length");
//return;
//}
for (int i = 2; i < args.length; i++) {
File f = new File(args);
try {
patch(f);
} catch (IOException x) {
System.err.println(f + ": " + x);
}
}
}
}
Thank you,
Sinan Topuz