Tuesday, April 15, 2008

Gun Zip Oracle Data using Java Stored procedure

Zip/ Unzip Oracle data

GZIP - Compress / decompress oracle data package

Description: Implements pl/sql pack and unpack methods using JDK java.util.zip package for both RAW and BLOB Oracle types

Methods:


FUNCTION pack(buf IN raw) RETURN raw;
FUNCTION unpack(buf IN raw) RETURN raw;
PROCEDURE packBlob(src IN blob, dst IN OUT blob);
PROCEDURE unpackBlob(src IN blob, dst IN OUT blob);


Installation steps:


Create java class to implement gzip methods:
CREATE OR REPLACE JAVA SOURCE NAMED "GZIPImpl"
AS
import java.io.*;
import java.util.zip.*;
import java.sql.*;
import oracle.sql.*;

public class GZIPImpl
{
public static byte[] packRaw(byte[] b) {
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
try {
GZIPOutputStream gzip = new GZIPOutputStream(outBuffer);
gzip.write(b);
gzip.close();
}
catch (IOException e) {
System.err.println(e);
}
return outBuffer.toByteArray();
}

public static byte[] unpackRaw(byte[] b) {
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
ByteArrayInputStream inBuffer = new ByteArrayInputStream(b);
try {
GZIPInputStream gzip = new GZIPInputStream(inBuffer);
byte[] tmpBuffer = new byte[256];
int n;
while ((n = gzip.read(tmpBuffer)) >= 0)
outBuffer.write(tmpBuffer, 0, n);
}
catch (IOException e) {
System.err.println(e);
}
return outBuffer.toByteArray();
}

public static void packBlob(oracle.sql.BLOB srcBlob, oracle.sql.BLOB dstBlob[]) {
try {
OutputStream outBuffer = dstBlob[0].getBinaryOutputStream();
InputStream inBuffer = srcBlob.getBinaryStream();
GZIPOutputStream gzip = new GZIPOutputStream(outBuffer);
byte[] tmpBuffer = new byte[256];
int n;
while ((n = inBuffer.read(tmpBuffer)) >= 0)
gzip.write(tmpBuffer, 0, n);
gzip.close();
}
catch (SQLException e) {
System.err.println(e);
}
catch (IOException e) {
System.err.println(e);
}
}

public static void unpackBlob(oracle.sql.BLOB srcBlob, oracle.sql.BLOB dstBlob[]) {
try {
OutputStream outBuffer = dstBlob[0].getBinaryOutputStream();
InputStream inBuffer = srcBlob.getBinaryStream();
GZIPInputStream gzip = new GZIPInputStream(inBuffer);
byte[] tmpBuffer = new byte[256];
int n;
while ((n = gzip.read(tmpBuffer)) >= 0)
outBuffer.write(tmpBuffer, 0, n);
outBuffer.close();
}
catch (SQLException e) {
System.err.println(e);
}
catch (IOException e) {
System.err.println(e);
}
}

};
/

Compile java class and check for errors:
alter java source "GZIPImpl" compile
/
show errors

Create a wrapper PL/SQL package:
CREATE PACKAGE GZIP AS
FUNCTION pack(buf IN raw) RETURN raw;
FUNCTION unpack(buf IN raw) RETURN raw;
PROCEDURE packBlob(src IN blob, dst IN OUT blob);
PROCEDURE unpackBlob(src IN blob, dst IN OUT blob);
END;
/

CREATE PACKAGE BODY GZIP AS
FUNCTION pack(buf IN raw) RETURN raw
AS LANGUAGE JAVA
NAME 'GZIPImpl.packRaw(byte[]) return byte[]';

FUNCTION unpack(buf IN raw) RETURN raw
AS LANGUAGE JAVA
NAME 'GZIPImpl.unpackRaw(byte[]) return byte[]';

PROCEDURE packBlob(src IN blob, dst IN OUT blob)
AS LANGUAGE JAVA
NAME 'GZIPImpl.packBlob(oracle.sql.BLOB, oracle.sql.BLOB[])';

PROCEDURE unpackBlob(src IN blob, dst IN OUT blob)
AS LANGUAGE JAVA
NAME 'GZIPImpl.unpackBlob(oracle.sql.BLOB, oracle.sql.BLOB[])';
END;
/

Test GZIP package:
DECLARE
src varchar2(32767);
srcRaw raw(32767);
zip raw(32767);
unzip varchar2(32767);
tmp varchar2(250);
tmpRaw raw(250);
srcBlob BLOB;
zipBlob BLOB;
unzipBlob BLOB;
read_amount binary_integer;
BEGIN
src := 'Many sources of information contain redundant data or data that adds '||
'little to the stored information. This results in tremendous amounts '||
'of data being transferred between client and server applications or '||
'computers in general. The obvious solution to the problems of data '||
'storage and information transfer is to install additional storage '||
'devices and expand existing communication facilities. To do so, however, '||
'requires an increase in an organization''s operating costs. One method '||
'to alleviate a portion of data storage and information transfer is '||
'through the representation of data by more efficient code. This article '||
'shows how to compress and decompress data, efficiently and conveniently, '||
'from within your ORACLE applications using the GZIP package.';

DBMS_OUTPUT.put_line('1. RAW TEST');
DBMS_OUTPUT.put_line('===========');

DBMS_OUTPUT.put_line('1.1 SOURCE TEXT:');
DBMS_OUTPUT.put_line(' Cut = '||substr(src, 1, 60)||' ...');
DBMS_OUTPUT.put_line(' Length = '||length(src));

DBMS_OUTPUT.put_line('1.2 COMPRESSED RAW:');
srcRaw := UTL_RAW.cast_to_raw(src); -- Don't forget explicit convertion!
zip := GZIP.pack(srcRaw);
tmp := '';
for i in 1 .. 15 loop
tmp := tmp||'0x'||UTL_RAW.substr(zip, i, 1)||' ';
end loop;
DBMS_OUTPUT.put_line(' Cut = '||tmp||' ...');
DBMS_OUTPUT.put_line(' Length = '||UTL_RAW.length(zip));

DBMS_OUTPUT.put_line('1.3 DECOMPRESSED TEXT:');
unzip := UTL_RAW.cast_to_varchar2(GZIP.unpack(zip)); -- Decompress and convert to varchar2
DBMS_OUTPUT.put_line(' Cut = '||substr(unzip, 1, 60)||' ...');
DBMS_OUTPUT.put_line(' Length = '||length(unzip));

DBMS_OUTPUT.put_line('2. BLOB TEST');
DBMS_OUTPUT.put_line('============');

-- Prepare temporary Blobs
DBMS_LOB.CreateTemporary(srcBlob, TRUE);
DBMS_LOB.CreateTemporary(zipBlob, TRUE);
DBMS_LOB.CreateTemporary(unzipBlob, TRUE);

DBMS_OUTPUT.put_line('2.1 SOURCE BLOB:');
srcRaw := UTL_RAW.cast_to_raw(src); -- Don't forget explicit convertion!
DBMS_LOB.write(srcBlob, UTL_RAW.length(srcRaw), 1, srcRaw); -- Fill source blob
-- Print results
read_amount := 60;
DBMS_LOB.read(srcBlob, read_amount, 1, tmpRaw);
DBMS_OUTPUT.put_line(' Cut = '||UTL_RAW.cast_to_varchar2(tmpRaw)||' ...');
DBMS_OUTPUT.put_line(' Length = '||to_char(DBMS_LOB.GetLength(srcBlob)));

DBMS_OUTPUT.put_line('2.2 COMPRESSED BLOB:');
GZIP.packBlob(srcBlob, zipBlob); -- Compress srcBlob into zipBlob
-- Print results
read_amount := 60;
DBMS_LOB.read(zipBlob, read_amount, 1, tmpRaw);
tmp := '';
for i in 1 .. 15 loop
tmp := tmp||'0x'||UTL_RAW.substr(tmpRaw, i, 1)||' ';
end loop;
DBMS_OUTPUT.put_line(' Cut = '||tmp||' ...');
DBMS_OUTPUT.put_line(' Length = '||to_char(DBMS_LOB.GetLength(zipBlob)));

DBMS_OUTPUT.put_line('2.3 DECOMPRESSED BLOB:');
GZIP.unpackBlob(zipBlob, unzipBlob); -- Decompress zipBlob into unzipBlob
-- Print results
read_amount := 60;
DBMS_LOB.read(unzipBlob, read_amount, 1, tmpRaw);
DBMS_OUTPUT.put_line(' Cut = '||UTL_RAW.cast_to_varchar2(tmpRaw)||' ...');
DBMS_OUTPUT.put_line(' Length = '||to_char(DBMS_LOB.GetLength(unzipBlob)));

-- Release temporary blobs
DBMS_LOB.FreeTemporary(srcBlob);
DBMS_LOB.FreeTemporary(zipBlob);
DBMS_LOB.FreeTemporary(unzipBlob);
END;
/

Code above should produce next results:

1. RAW TEST
===========
1.1 SOURCE TEXT:
Cut = Many sources of information contain redundant data or data t ...
Length = 754
1.2 COMPRESSED RAW:
Cut = 0x1F 0x8B 0x08 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x95 0x92 0xCB 0x8E 0x14 ...
Length = 408
1.3 DECOMPRESSED TEXT:
Cut = Many sources of information contain redundant data or data t ...
Length = 754
2. BLOB TEST
============
2.1 SOURCE BLOB:
Cut = Many sources of information contain redundant data or data t ...
Length = 754
2.2 COMPRESSED BLOB:
Cut = 0x1F 0x8B 0x08 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x95 0x92 0xCB 0x8E 0x14 ...
Length = 408
2.3 DECOMPRESSED BLOB:
Cut = Many sources of information contain redundant data or data t ...
Length = 754


Examples


Compress/decompress on fly stored data using DML (less when 4000 bytes in size):

insert into my_table (compressed_text) values (GZIP.pack(UTL_RAW.cast_to_raw('1234567890')));
select UTL_RAW.cast_to_varchar2(GZIP.unpack(compressed_text)) from my_table;

Compress/decompress BLOB stored data (size <= 4Gb):

DECLARE
SrcBlobLocator BLOB;
DstBlobLocator BLOB;
BEGIN
select blob_data into SrcBlobLocator from src_table where id=1;
select blob_data into DstBlobLocator from dst_table where id=1;
GZIP.packBlob(SrcBlobLocator, DstBlobLocator);
commit;
END;

No comments: