Intro

Not long ago I posed a challenge for those of us learning rust: https://lemmy.ml/post/12478167.

Basically write an equivalent of git diff --no-index A B … a file differ.

While it’s never too late to attempt it, I figured it’d be a good time to check in to see what anyone thought of it, in part because some people may have forgotten about it and would still like to have a shot, and also because I had a shot and am happy with what I wrote.

Check In

I’ll post where I got up to below (probably as a comment), but before that, does anyone have anything to share on where they got up to … any general thoughts on the challenge and the general idea of these?

My experience

My personal experience was that I’d not kept up with my rust “studies” for a bit and used this as a good “warm up” or “restart” exercise and it worked really well. Obviously learning through doing is a good idea, and the Rust Book is a bit too light, IMO, on good exercises or similar activities. But I found this challenge just difficult enough to make me feel more comfortable with the language.

Future Challenges

Any ideas for future challenges??

My quick thoughts

  • A simple web app like a todo app using axtix_web and diesel and some templating crate.
  • Extend my diffing program to output JSON/HTML and then to diff by characters in a string
  • A markdown parser??
  • maegul (he/they)OPM
    link
    fedilink
    English
    arrow-up
    1
    ·
    6 months ago

    Code

    use std::env;
    use std::fs::read_to_string;
    
    // > Data Types
    
    #[derive(Debug)]
    struct FileLens {
        a: usize,
        b: usize
    }
    impl FileLens {
        fn new(a_lines: &Vec<&str>, b_lines: &Vec<&str>) -> FileLens {
            FileLens{
                a: a_lines.len(),
                b: b_lines.len()
            }
        }
    }
    
    
    #[derive(Debug)]
    struct Cursors {
        a: usize,
        b: usize,
    }
    impl Cursors {
        fn increment_cursor(&self, chunk: Option<&Chunk>) -> Cursors {
            let (a_inc, b_inc) = match chunk {
                Some(chnk) => {
                    match chnk.chunk_type {
                        ChunkType::Addition => (
                            -1 * (i32::try_from(chnk.b_lines.len())
                                .expect("Lines vector too big for i32!")),
                            0,
                        ),
                        ChunkType::Deletion => (
                            0,
                            -1 * (i32::try_from(chnk.a_lines.len())
                                .expect("Lines vector too big for i32!")),
                        ),
                        ChunkType::Match | ChunkType::Modification => (1, 1),
                    }
                }
                None => (1, 1),
            };
    
            let new_a_cursor = (i32::try_from(self.a).expect("opps")) + a_inc;
            let new_b_cursor = (i32::try_from(self.b).expect("opps")) + b_inc;
    
            // negative cursors should be captured here (despite bad error msg)
            Cursors {
                a: usize::try_from(new_a_cursor).expect("oops"),
                b: usize::try_from(new_b_cursor).expect("oops"),
            }
        }
    
        fn clone(&self) -> Cursors {
            Cursors{a: self.a, b: self.b}
        }
    }
    
    #[derive(Debug)]
    enum ChunkType {
        Match,
        Addition,
        Deletion,
        Modification,
    }
    #[derive(Debug)]
    struct Chunk {
        chunk_type: ChunkType,
        a_lines: Vec<String>,
        b_lines: Vec<String>,
        cursor: Cursors,
    }
    #[derive(Debug)]
    #[derive(Clone)]
    enum FileSpec {
        A,
        B,
        AB
    }
    #[derive(Debug)]
    enum CompType {
        Match,
        Change,
        FileEnd(FileSpec)
    }
    // No Union for LineComp and FileSpec, so fold together
    #[derive(Debug)]
    struct LineComp {
        // don't store the string, as copying/borrowing gets involved, instead the cursor
        // ... we can get all the strings at the end when necessary
        cursor: Cursors,
        kind: CompType
    }
    
    
    // > States
    
    #[derive(Debug)]
    struct ChunkData {
        lines: Vec<LineComp>,
        cursor: Cursors,
        start_cursor: Cursors
    }
    
    impl ChunkData {
        fn set_cursor(&mut self, cursor: Cursors) {
            self.cursor = cursor;
        }
    }
    
    enum State {
        NewChunk {cursor: Cursors},
        NewLine {chunk_data: ChunkData},
        ContinuingChunk {chunk_data: ChunkData, line_read: LineComp},
        ContinuingChangeChunk {chunk_data: ChunkData, line_read: LineComp},
        EndingChunk {chunk_data: ChunkData, line_read: LineComp},
        EndingChangedChunk {chunk_data: ChunkData, chunk_type: ChunkType},
        // hmmm, LineComp type is actually LineComp with FileEnd as kind ... how type?
        FileEnd {chunk_data: ChunkData, line_read: FileSpec},
        FileEndChange {chunk_data: ChunkData},
        End
    }
    
    
    fn read_line(
        cursor: &Cursors, file_lens: &FileLens,
        a_lines: &Vec<&str>, b_lines: &Vec<&str>) -> LineComp {
    
        let a_end = ! (cursor.a < file_lens.a);
        let b_end = ! (cursor.b < file_lens.b);
    
        match (a_end, b_end) {
            (false, false) => {
                if a_lines[cursor.a] == b_lines[cursor.b] {
                    LineComp{cursor: cursor.clone(), kind: CompType::Match}
                }
                else {
                    LineComp{cursor: cursor.clone(), kind: CompType::Change}
                }
            },
            (false, true) => LineComp{cursor: cursor.clone(), kind: CompType::FileEnd(FileSpec::B)},
            (true, false) => LineComp{cursor: cursor.clone(), kind: CompType::FileEnd(FileSpec::A)},
            (true, true) => LineComp{cursor: cursor.clone(), kind: CompType::FileEnd(FileSpec::AB)},
        }
    
    }
    
    ...
    
    
    • maegul (he/they)OPM
      link
      fedilink
      English
      arrow-up
      1
      ·
      6 months ago

      … continued

      fn main() {
          // > Getting args
          let args: Vec<String> = env::args().collect();
      
          if args[1..].len() != 2 {
              panic!(
                  "Must provide two paths. Instead provided {:}",
                  args[1..].len()
              );
          }
      
          println!("Args:");
          for a in args[1..].iter() {
              println!("{}", a);
          }
      
          // > Reading files and splitting into lines
      
          let a = read_to_string(&args[1]).expect("Failed to read file");
          let b = read_to_string(&args[2]).expect("Failed to read file");
          let a_lines: Vec<&str> = a.split("\n").collect();
          let b_lines: Vec<&str> = b.split("\n").collect();
      
          // > Initialising globals
      
          let file_lengths = FileLens::new(&a_lines, &b_lines);
      
          let cursor = Cursors { a: 0, b: 0 };
          let mut chunks: Vec<Chunk> = vec![];
      
          // mut necessary as overwriting in each loop
          let mut state: State = State::NewChunk {cursor};
      
          // > Loop
          loop {
              state = match state {
                  State::NewChunk { cursor } => State::NewLine {
                      chunk_data: ChunkData{
                          lines: Vec::new(), cursor: cursor.clone(), start_cursor: cursor}
                  },
                  State::NewLine { chunk_data } => {
                      let line_read = read_line(&chunk_data.cursor, &file_lengths, &a_lines, &b_lines);
      
                      match chunk_data.lines.as_slice() {
                          [] => {
                              match line_read.kind {
                                  CompType::Match => State::ContinuingChunk {
                                      chunk_data, line_read },
                                  CompType::Change => State::ContinuingChunk {
                                      chunk_data, line_read },
                                  CompType::FileEnd(file_spec) => State::FileEnd {
                                      chunk_data, line_read: file_spec},
                              }
                          },
                          [.., lc] => {
                              match lc.kind {
                                  CompType::Match => {
                                      match line_read.kind {
                                          CompType::Match => State::ContinuingChunk {
                                              chunk_data, line_read },
                                          CompType::Change => State::EndingChunk {
                                              chunk_data, line_read },
                                          CompType::FileEnd(file_spec) => State::FileEnd {
                                              chunk_data, line_read: file_spec},
                                      }
                                  }
                                  CompType::Change => {
                                      match line_read.kind {
                                          CompType::Match => State::EndingChunk {
                                              chunk_data, line_read },
                                          CompType::Change => State::ContinuingChangeChunk {
                                              chunk_data, line_read },
                                          CompType::FileEnd(_) => State::FileEndChange {
                                              chunk_data},
                                      }
                                  }
                                  CompType::FileEnd(_) => panic!(
                                  // error! should not have come here from FileEnd
                                      "Failed to process file end correctly (failed at lines a:{},b:{})",
                                      line_read.cursor.a, line_read.cursor.b),
                              }
                          }
                      }
                  },
                  State::ContinuingChunk { mut chunk_data, line_read } => {
                      chunk_data.lines.push(line_read);
                      let new_cursor = chunk_data.cursor.increment_cursor(None);
                      chunk_data.set_cursor(new_cursor);
                      State::NewLine{chunk_data}
                  },
                  State::ContinuingChangeChunk { chunk_data, line_read } => {
                      let first_lc = chunk_data.lines.first().unwrap();
                      if a_lines[first_lc.cursor.a] == b_lines[line_read.cursor.b] {
                          State::EndingChangedChunk{
                              chunk_data, chunk_type: ChunkType::Addition
                          }
                      }
                      else if a_lines[line_read.cursor.a] == b_lines[first_lc.cursor.b] {
                          State::EndingChangedChunk{
                              chunk_data, chunk_type: ChunkType::Deletion
                          }
                      }
                      else {
                          State::ContinuingChunk { chunk_data, line_read }
                      }
                  },
                  State::EndingChunk { chunk_data, line_read } => {
                      let chunk_type = match line_read.kind {
                          CompType::Match => ChunkType::Modification,
                          CompType::Change => ChunkType::Match,
                          CompType::FileEnd(_) => panic!(
                                  // error! should not have come here from FileEnd
                                  "Failed to process file end correctly (failed at lines a:{},b:{})",
                                  line_read.cursor.a, line_read.cursor.b
                              )
                      };
                      let new_a_lines: Vec<String> = chunk_data.lines.iter()
                                                      .map(|lc| String::from(a_lines[lc.cursor.a]))
                                                      .collect();
                      let new_b_lines: Vec<String> = chunk_data.lines.iter()
                                                      .map(|lc| String::from(b_lines[lc.cursor.b]))
                                                      .collect();
                      chunks.push(
                          Chunk{
                              chunk_type,
                              a_lines: new_a_lines,
                              b_lines: new_b_lines,
                              cursor: chunk_data.start_cursor,
                          }
                      );
      
                      // continue from last read line, but with a new chunk
                      // ... repetitive yes, but cleaner code I think
                      State::NewChunk { cursor: line_read.cursor }
                  },
                  State::EndingChangedChunk { chunk_data, chunk_type } => {
      
                      let new_a_lines: Vec<String> = chunk_data.lines.iter()
                                                      .map(|lc| String::from(a_lines[lc.cursor.a]))
                                                      .collect();
                      let new_b_lines: Vec<String> = chunk_data.lines.iter()
                                                      .map(|lc| String::from(b_lines[lc.cursor.b]))
                                                      .collect();
                      chunks.push(
                          Chunk{
                              chunk_type,
                              a_lines: new_a_lines,
                              b_lines: new_b_lines,
                              cursor: chunk_data.start_cursor,
                          }
                      );
                      let new_cursor = chunk_data.cursor.increment_cursor(Some(chunks.last().unwrap()));
      
                      State::NewChunk { cursor: new_cursor }
                  },
                  State::FileEnd { chunk_data, line_read} => {
                      match line_read {
                          FileSpec::A => {
                              chunks.push(
                                  Chunk{
                                      chunk_type: ChunkType::Addition,
                                      a_lines: vec![],
                                      b_lines: b_lines[chunk_data.cursor.b..].iter()
                                                  .map(|s| String::from(*s)).collect(),
                                      cursor: chunk_data.cursor,
                                  }
                              );
                              State::End
                          },
                          FileSpec::B => {
                              chunks.push(
                                  Chunk{
                                      chunk_type: ChunkType::Deletion,
                                      a_lines: a_lines[chunk_data.cursor.a..].iter()
                                                  .map(|s| String::from(*s)).collect(),
                                      b_lines: vec![],
                                      cursor: chunk_data.cursor,
                                  }
                              );
                              State::End
                          },
                          FileSpec::AB => State::End,
                      }
                  },
                  State::FileEndChange { chunk_data } => {
                      let a_cursor = chunk_data.start_cursor.a.clone();
                      let b_cursor = chunk_data.start_cursor.b.clone();
                      chunks.push(
                          Chunk{
                              chunk_type: ChunkType::Deletion,
                              a_lines: a_lines[a_cursor..].iter()
                                          .map(|s| String::from(*s)).collect(),
                              b_lines: vec![],
                              cursor: chunk_data.start_cursor,
                          }
                      );
                      chunks.push(
                          Chunk{
                              chunk_type: ChunkType::Addition,
                              a_lines: vec![],
                              b_lines: b_lines[b_cursor..].iter()
                                          .map(|s| String::from(*s)).collect(),
                              cursor: chunk_data.cursor,
                          }
                      );
                      State::End
                  },
                  State::End => break,
              };
      
          }
      
          // > Wrap up
      
          println!("Done!");
      
          for (i,c) in chunks.iter().enumerate() {
              println!("\n--- Chunk: {} ---", i);
              println!("Type: {:?}", c.chunk_type);
              println!("A: {:?}", c.a_lines);
              println!("B: {:?}", c.b_lines);
          }
      }