Tech Kraft

Documentum, AWS, Java, Ruby on Rails, Linux, Windows, App Servers


Java: How to extract lines matching a pattern from text file

Here’s some code I wrote in java to extract filepaths from a log file:

import java.io.*;
 
 /**
 * Created by amitabh on 12/14/15.
 */
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 public class ReadFile {
 
    public static void main(String[] args) throws Exception {
        //String folderPath = "U:\\Upgrade_DMFileReport_Compare\\dm_FileReport-EDMS-Prod_Orig";
        //String folderPath = "U:\\Upgrade_DMFileReport_Compare\\dm_FileReport-Legacy-Prod_Orig";
        String folderPath = "U:\\Upgrade_DMFileReport_Compare\\logs";
 
        File directory = new File(folderPath);
        File[] fileList = directory.listFiles();
        for (File file: fileList){
            String sourceFileName = folderPath + "\\" + file.getName();
            String destinationFileName = sourceFileName + ".log";
            System.out.println(sourceFileName);
            copyFile(sourceFileName, destinationFileName);
        }
    }
 
    private static void copyFile(String sourceFileName, String destinationFileName) throws Exception {
 
        BufferedReader br = null;
        PrintWriter pw = null;
        System.out.println(destinationFileName);
 
        // try {
        br = new BufferedReader(new FileReader(sourceFileName));
        //pw = new PrintWriter(new FileWriter(destinationFileName));
          pw = new PrintWriter(destinationFileName, "UTF-8");
        String line;
        while ((line = br.readLine()) != null) {
            String temp = matchString(line);
            if (!temp.isEmpty()) {
                pw.println(matchString(line));
            }
        }
 
        br.close();
        pw.close();
        // } catch (Exception e) {
        //     e.printStackTrace();
    }
 
    private static String matchString(String line) {
        // String to be scanned to find the pattern.
        //String pattern = "(/opt/documentum/data/)([a-zA-Z0-9/_]*)(.([a-zA-Z]{2,3}))*";
        String pattern = "(/opt/documentum/data/)([a-zA-Z0-9/_]*)(.(doc|docx|xls|xlsx|pdf|ppt|pptx))";
        String retValue="";
 
        // Create a Pattern object
        Pattern r = Pattern.compile(pattern);
 
        // Now create matcher object.
         Matcher m = r.matcher(line);
        if (m.find()) {
            retValue = m.group(0);
            //System.out.println(retValue);
        }
      return retValue;
    }
 }


Leave a comment

About Me

Senior Software Engineer professional with over 16 years of success with multiple open source technologies and various Content Management platforms and solutions.

Proven technical abilities through numerous projects involving enterprise web application design and development, application installation, configuration and support, and workflow and collaboration system designs.

  • Ability to learn new technologies and platforms quickly and apply them to the task at hand.
  • Excellent analytical skills, and strong communication and collaboration abilities.
  • Technical emphasis in including but not limited to Java, Ruby on Rails, Documentum and Alfresco
    in both Linux and Windows based environments

Newsletter