名扬数据:Java中用内存映射处理大文件

在处理大文件时,如果利用普通的FileInputStream 或者FileOutputStream 抑或RandomAccessFile 来进行频繁的读写操作,都将导致进程因频繁读写外存而降低速度.如下为一个对比实验。


    package test;  

     

    import java.io.BufferedInputStream;  

    import java.io.FileInputStream;  

    import java.io.FileNotFoundException;  

    import java.io.IOException;  

    import java.io.RandomAccessFile;  

    import java.nio.MappedByteBuffer;  

    import java.nio.channels.FileChannel;  

     

    public class Test {  

     

          

        public static void main(String[] args) {  

            try {  

                FileInputStream fis=new FileInputStream("/home/tobacco/test/res.txt");  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                try {  

                    while((n=fis.read())>=0){  

                        sum+=n;  

                    }  

                } catch (IOException e) {  

                    // TODO Auto-generated catch block  

                    e.printStackTrace();  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

              

            try {  

                FileInputStream fis=new FileInputStream("/home/tobacco/test/res.txt");  

                BufferedInputStream bis=new BufferedInputStream(fis);  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                try {  

                    while((n=bis.read())>=0){  

                        sum+=n;  

                    }  

                } catch (IOException e) {  

                    // TODO Auto-generated catch block  

                    e.printStackTrace();  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

              

            MappedByteBuffer buffer=null;  

            try {  

                buffer=new RandomAccessFile("/home/tobacco/test/res.txt","rw").getChannel().map(FileChannel.MapMode.READ_WRITE, 0, 1253244);  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                for(int i=0;i<1253244;i++){  

                    n=0x000000ff&buffer.get(i);  

                    sum+=n;  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            } catch (IOException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

     

        }  

     

    }  


测试文件为一个大小为1253244字节的文件。测试结果:


    sum:220152087 time:1464  

    sum:220152087 time:72  

    sum:220152087 time:25 


说明读数据无误。删去其中的数据处理部分。


    package test;  

     

    import java.io.BufferedInputStream;  

    import java.io.FileInputStream;  

    import java.io.FileNotFoundException;  

    import java.io.IOException;  

    import java.io.RandomAccessFile;  

    import java.nio.MappedByteBuffer;  

    import java.nio.channels.FileChannel;  

     

    public class Test {  

     

          

        public static void main(String[] args) {  

            try {  

                FileInputStream fis=new FileInputStream("/home/tobacco/test/res.txt");  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                try {  

                    while((n=fis.read())>=0){  

                        //sum+=n;  

                    }  

                } catch (IOException e) {  

                    // TODO Auto-generated catch block  

                    e.printStackTrace();  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

              

            try {  

                FileInputStream fis=new FileInputStream("/home/tobacco/test/res.txt");  

                BufferedInputStream bis=new BufferedInputStream(fis);  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                try {  

                    while((n=bis.read())>=0){  

                        //sum+=n;  

                    }  

                } catch (IOException e) {  

                    // TODO Auto-generated catch block  

                    e.printStackTrace();  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

              

            MappedByteBuffer buffer=null;  

            try {  

                buffer=new RandomAccessFile("/home/tobacco/test/res.txt","rw").getChannel().map(FileChannel.MapMode.READ_WRITE, 0, 1253244);  

                int sum=0;  

                int n;  

                long t1=System.currentTimeMillis();  

                for(int i=0;i<1253244;i++){  

                    //n=0x000000ff&buffer.get(i);  

                    //sum+=n;  

                }  

                long t=System.currentTimeMillis()-t1;  

                System.out.println("sum:"+sum+"  time:"+t);  

            } catch (FileNotFoundException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            } catch (IOException e) {  

                // TODO Auto-generated catch block  

                e.printStackTrace();  

            }  

     

        }  

     

    } 


测试结果:


    sum:0 time:1458  

    sum:0 time:67  

    sum:0 time:8 


由此可见,将文件部分或者全部映射到内存后进行读写,速度将提高很多。


这是因为内存映射文件首先将外存上的文件映射到内存中的一块连续区域,被当成一个字节数组进行处理,读写操作直接对内存进行操作,而后再将内存区域重新映射到外存文件,这就节省了中间频繁的对外存进行读写的时间,大大降低了读写时间。